diff --git a/ccminer.cpp b/ccminer.cpp
index b7f182d0..dc69ea3d 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -71,6 +71,7 @@ void cuda_devicereset();
int cuda_finddevice(char *name);
void cuda_print_devices();
void cuda_get_device_sm();
+void cuda_reset_device(int thr_id, bool *init);
#include "nvml.h"
#ifdef USE_WRAPNVML
@@ -90,6 +91,11 @@ struct workio_cmd {
} u;
};
+bool opt_debug_diff = false;
+bool opt_debug_threads = false;
+bool opt_showdiff = true;
+bool opt_hwmonitor = true;
+
static const char *algo_names[] = {
"bitcoin",
"blake",
@@ -149,6 +155,7 @@ static json_t *opt_config = nullptr;
static const bool opt_time = true;
enum sha_algos opt_algo;
int opt_n_threads = 0;
+int gpu_threads = 1;
int opt_affinity = -1;
int opt_priority = 0;
static double opt_difficulty = 1; // CH
@@ -156,14 +163,21 @@ static bool opt_extranonce = true;
bool opt_trust_pool = false;
int num_cpus;
int active_gpus;
+bool need_nvsettings = false;
+bool need_memclockrst = false;
char * device_name[MAX_GPUS] = { nullptr };
int device_map[MAX_GPUS] = { 0 };
long device_sm[MAX_GPUS] = { 0 };
uint32_t gpus_intensity[MAX_GPUS] = {0};
+int32_t device_mem_offsets[MAX_GPUS] = {0};
uint32_t device_gpu_clocks[MAX_GPUS] = {0};
uint32_t device_mem_clocks[MAX_GPUS] = {0};
uint32_t device_plimit[MAX_GPUS] = {0};
int8_t device_pstate[MAX_GPUS];
+int32_t device_led[MAX_GPUS] = {-1, -1};
+int opt_led_mode = 0;
+int opt_cudaschedule = -1;
+uint8_t device_tlimit[MAX_GPUS] = {0};
char *rpc_user = NULL;
static char *rpc_url = nullptr;
static char *rpc_userpass = nullptr;
@@ -179,16 +193,17 @@ int longpoll_thr_id = -1;
int stratum_thr_id = -1;
int api_thr_id = -1;
bool stratum_need_reset = false;
+volatile bool abort_flag = false;
struct work_restart *work_restart = NULL;
struct stratum_ctx stratum = { 0 };
bool stop_mining = false;
volatile bool mining_has_stopped[MAX_GPUS];
pthread_mutex_t applog_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
uint32_t accepted_count = 0L;
uint32_t rejected_count = 0L;
-static double thr_hashrates[MAX_GPUS];
+double thr_hashrates[MAX_GPUS];
uint64_t global_hashrate = 0;
double global_diff = 0.0;
uint64_t net_hashrate = 0;
@@ -291,8 +306,8 @@ Options:\n\
"\
--mem-clock=N Set the gpu memory max clock (346.72+ driver)\n\
--gpu-clock=N Set the gpu engine max clock (346.72+ driver)\n\
- --pstate=N Set the gpu power state (352.21+ driver)\n\
- --plimit=N Set the gpu power limit(352.21 + driver)\n"
+ --pstate=N (not for 10xx cards) Set the gpu power state (352.21+ driver)\n\
+ --plimit=N Set the gpu power limit (352.21+ driver)\n"
#endif
"";
@@ -463,6 +478,26 @@ void proper_exit(int reason)
#ifdef WIN32
timeEndPeriod(1);
#endif
+#ifdef USE_WRAPNVML
+ if(hnvml)
+ {
+ for(int n = 0; n < opt_n_threads; n++)
+ {
+ nvml_reset_clocks(hnvml, device_map[n]);
+ }
+ nvml_destroy(hnvml);
+ }
+ if(need_memclockrst)
+ {
+# ifdef WIN32
+ for(int n = 0; n < opt_n_threads; n++)
+ {
+ nvapi_toggle_clocks(n, false);
+ }
+# endif
+ }
+#endif
+
sleep(1);
exit(reason);
}
@@ -2538,7 +2573,11 @@ static void parse_arg(int key, char *arg)
while(pch != NULL && n < MAX_GPUS)
{
int dev_id = device_map[n++];
- device_mem_clocks[dev_id] = atoi(pch);
+ if(*pch == '+' || *pch == '-')
+ device_mem_offsets[dev_id] = atoi(pch);
+ else
+ device_mem_clocks[dev_id] = atoi(pch);
+ need_nvsettings = true;
pch = strtok(NULL, ",");
}
}
@@ -2804,28 +2843,6 @@ int main(int argc, char *argv[])
cuda_devicenames();
-#ifdef USE_WRAPNVML
-#if defined(__linux__) || defined(_WIN64)
- /* nvml is currently not the best choice on Windows (only in x64) */
- hnvml = nvml_create();
- if(hnvml)
- {
- bool gpu_reinit = false;// (opt_cudaschedule >= 0);
- cuda_devicenames(); // refresh gpu vendor name
- applog(LOG_INFO, "NVML GPU monitoring enabled.");
- }
-#endif
-#ifdef WIN32
- if(!hnvml && nvapi_init() == 0)
- {
- applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
- cuda_devicenames(); // refresh gpu vendor name
- }
-#endif
- else if(!hnvml)
- applog(LOG_INFO, "GPU monitoring is not available.");
-#endif
-
if(opt_protocol)
{
curl_version_info_data *info;
@@ -3025,26 +3042,63 @@ int main(int argc, char *argv[])
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
}
+
+#ifdef __linux__
+ if(need_nvsettings)
+ {
+ if(nvs_init() < 0)
+ need_nvsettings = false;
+ }
+#endif
+
#ifdef USE_WRAPNVML
#if defined(__linux__) || defined(_WIN64)
/* nvml is currently not the best choice on Windows (only in x64) */
- if (hnvml) {
- bool gpu_reinit = false;// (opt_cudaschedule >= 0);
+ hnvml = nvml_create();
+ if(hnvml)
+ {
+ bool gpu_reinit = (opt_cudaschedule >= 0); //false
+ cuda_devicenames(); // refresh gpu vendor name
+ if(!opt_quiet)
+ applog(LOG_INFO, "NVML GPU monitoring enabled.");
for(int n = 0; n < active_gpus; n++)
{
if(nvml_set_pstate(hnvml, device_map[n]) == 1)
gpu_reinit = true;
if(nvml_set_plimit(hnvml, device_map[n]) == 1)
gpu_reinit = true;
- if(nvml_set_clocks(hnvml, device_map[n]) == 1)
+ if(!is_windows() && nvml_set_clocks(hnvml, device_map[n]) == 1)
gpu_reinit = true;
if(gpu_reinit)
{
-// cuda_reset_device(n, NULL);
+ cuda_reset_device(n, NULL);
}
}
}
#endif
+#ifdef WIN32
+ if(nvapi_init() == 0)
+ {
+ if(!opt_quiet)
+ applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
+ if(!hnvml)
+ {
+ cuda_devicenames(); // refresh gpu vendor name
+ }
+ nvapi_init_settings();
+ }
+#endif
+ else if(!hnvml && !opt_quiet)
+ applog(LOG_INFO, "GPU monitoring is not available.");
+
+ // force reinit to set default device flags
+ if(opt_cudaschedule >= 0 && !hnvml)
+ {
+ for(int n = 0; n < active_gpus; n++)
+ {
+ cuda_reset_device(n, NULL);
+ }
+ }
#endif
if(opt_api_listen)
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index fe8582ee..8a62ede9 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -285,6 +285,7 @@
+
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index 55ee3a64..caa2c361 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -207,6 +207,9 @@
Source Files
+
+ Source Files
+
diff --git a/cuda.cpp b/cuda.cpp
index 4d26f83d..b65ca872 100644
--- a/cuda.cpp
+++ b/cuda.cpp
@@ -281,3 +281,11 @@ double throughput2intensity(uint32_t throughput)
}
return intensity;
}
+
+void cuda_reset_device(int thr_id, bool *init)
+{
+ int dev_id = device_map[thr_id];
+ cudaSetDevice(dev_id);
+ cudaDeviceReset();
+ cudaDeviceSynchronize();
+}
diff --git a/miner.h b/miner.h
index 66575b31..2e5dbce6 100644
--- a/miner.h
+++ b/miner.h
@@ -75,9 +75,11 @@ void *alloca (size_t);
#ifdef HAVE_SYSLOG_H
#include
-#define LOG_BLUE 0x10 /* unique value */
+#define LOG_BLUE 0x10
+#define LOG_RAW 0x99
#else
-enum {
+enum
+{
LOG_ERR,
LOG_WARNING,
LOG_NOTICE,
@@ -85,6 +87,7 @@ enum {
LOG_DEBUG,
/* custom notices */
LOG_BLUE = 0x10,
+ LOG_RAW = 0x99
};
#endif
@@ -481,6 +484,7 @@ struct thr_info {
extern int cuda_num_devices();
extern int cuda_version();
extern int cuda_gpu_clocks(struct cgpu_info *gpu);
+int cuda_gpu_info(struct cgpu_info *gpu);
extern bool opt_verify;
extern bool opt_benchmark;
extern bool opt_debug;
@@ -507,7 +511,7 @@ extern int longpoll_thr_id;
extern int stratum_thr_id;
extern int api_thr_id;
extern bool opt_trust_pool;
-
+extern volatile bool abort_flag;
extern uint64_t global_hashrate;
extern double global_diff;
@@ -515,8 +519,10 @@ extern double global_diff;
extern char* device_name[MAX_GPUS];
extern int device_map[MAX_GPUS];
extern long device_sm[MAX_GPUS];
+extern uint32_t device_plimit[MAX_GPUS];
extern uint32_t gpus_intensity[MAX_GPUS];
double throughput2intensity(uint32_t throughput);
+extern void gpulog(int prio, int thr_id, const char *fmt, ...);
#define CL_N "\x1B[0m"
#define CL_RED "\x1B[31m"
diff --git a/nvml.cpp b/nvml.cpp
index bce34e99..7b7d3227 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* A trivial little dlopen()-based wrapper library for the
* NVIDIA NVML library, to allow runtime discovery of NVML on an
* arbitrary system. This is all very hackish and simple-minded, but
@@ -34,19 +34,19 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
extern uint32_t device_gpu_clocks[MAX_GPUS];
extern uint32_t device_mem_clocks[MAX_GPUS];
-extern uint32_t device_plimit[MAX_GPUS];
+extern int32_t device_mem_offsets[MAX_GPUS];
+extern uint8_t device_tlimit[MAX_GPUS];
extern int8_t device_pstate[MAX_GPUS];
+extern int32_t device_led[MAX_GPUS];
+int32_t device_led_state[MAX_GPUS] = { 0 };
+static THREAD bool has_rgb_ok = false;
uint32_t clock_prev[MAX_GPUS] = { 0 };
uint32_t clock_prev_mem[MAX_GPUS] = { 0 };
uint32_t limit_prev[MAX_GPUS] = { 0 };
-static bool nvml_plimit_set = false;
-#ifdef WIN32
-#include "nvapi/nvapi_ccminer.h"
-static int nvapi_dev_map[MAX_GPUS] = {0};
-static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = {0};
-#endif
+static bool nvml_plimit_set = false;
+extern bool need_memclockrst;
/*
* Wrappers to emulate dlopen() on other systems like Windows
@@ -94,7 +94,7 @@ nvml_handle * nvml_create()
int i=0;
nvml_handle *nvmlh = NULL;
-#if defined(WIN32)
+#ifdef WIN32
/* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */
#define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll"
#else
@@ -195,14 +195,20 @@ nvml_handle * nvml_create()
nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit");
// v340
- /* NVML_ERROR_NOT_SUPPORTED
- nvmlh->nvmlDeviceGetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled))
- wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAutoBoostedClocksEnabled");
- nvmlh->nvmlDeviceSetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t enabled))
- wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAutoBoostedClocksEnabled"); */
+#ifdef __linux__
+ nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t))
+ wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity");
+ nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet))
+ wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity");
+ nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t))
+ wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity");
+#endif
// v346
nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput");
+ // v36x (API 8 / Pascal)
+ nvmlh->nvmlDeviceGetClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz))
+ wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock");
if (nvmlh->nvmlInit == NULL ||
nvmlh->nvmlShutdown == NULL ||
@@ -218,20 +224,11 @@ nvml_handle * nvml_create()
free(nvmlh);
return NULL;
}
- nvmlReturn_t rc;
- rc = nvmlh->nvmlInit();
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "nvmlInit() failed: %s", nvmlh->nvmlErrorString(rc));
- return NULL;
- }
- rc = nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version));
- if(rc != NVML_SUCCESS)
- applog(LOG_WARNING, "nvmlSystemGetDriverVersion() failed: %s", nvmlh->nvmlErrorString(rc));
- rc = nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
- if(rc != NVML_SUCCESS)
- applog(LOG_WARNING, "nvmlDeviceGetCount() failed: %s", nvmlh->nvmlErrorString(rc));
+ nvmlh->nvmlInit();
+ if (nvmlh->nvmlSystemGetDriverVersion)
+ nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version));
+ nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
/* Query CUDA device count, in case it doesn't agree with NVML, since */
/* CUDA will only report GPUs with compute capability greater than 1.0 */
@@ -247,17 +244,15 @@ nvml_handle * nvml_create()
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+ nvmlh->nvml_pci_vendor_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_subsys_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
nvmlh->app_clocks = (nvmlEnableState_t*) calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t));
/* Obtain GPU device handles we're going to need repeatedly... */
- for (i=0; invml_gpucount; i++)
- {
- rc = nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
- if(rc != NVML_SUCCESS)
- applog(LOG_WARNING, "GPU %d: nvmlDeviceGetHandleByIndex() failed: %s", i, nvmlh->nvmlErrorString(rc));
+ for (i=0; invml_gpucount; i++) {
+ nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
}
/* Query PCI info for each NVML device, and build table for mapping of */
@@ -269,23 +264,18 @@ nvml_handle * nvml_create()
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
nvmlh->nvml_pci_device_id[i] = pciinfo.device;
+ nvmlh->nvml_pci_vendor_id[i] = pciinfo.pci_device_id;
nvmlh->nvml_pci_subsys_id[i] = pciinfo.pci_subsystem_id;
nvmlh->app_clocks[i] = NVML_FEATURE_UNKNOWN;
- if (nvmlh->nvmlDeviceSetAPIRestriction)
- {
- rc = nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
+ if (nvmlh->nvmlDeviceSetAPIRestriction) {
+ nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
NVML_FEATURE_ENABLED);
- if(rc != NVML_SUCCESS && opt_debug)
- applog(LOG_WARNING, "Device %d: nvmlDeviceSetAPIRestriction() failed: %s", nvmlh->devs[i], nvmlh->nvmlErrorString(rc));
/* there is only this API_SET_APPLICATION_CLOCKS on the 750 Ti (340.58) */
}
- if (nvmlh->nvmlDeviceGetAPIRestriction)
- {
- rc = nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
+ if (nvmlh->nvmlDeviceGetAPIRestriction) {
+ nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
&nvmlh->app_clocks[i]);
- if(rc != NVML_SUCCESS)
- applog(LOG_WARNING, "Device %d: nvmlDeviceGetAPIRestriction() failed: %s", nvmlh->devs[i], nvmlh->nvmlErrorString(rc));
}
}
@@ -316,30 +306,14 @@ nvml_handle * nvml_create()
return nvmlh;
}
-#ifdef WIN32
-// Replacement for WIN32 CUDA 6.5 on pascal
-int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
-{
- NvAPI_Status ret = NVAPI_OK;
- NV_DISPLAY_DRIVER_MEMORY_INFO mem = {0};
- mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
- unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
- if((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK)
- {
- *total = (uint64_t)mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
- *free = (uint64_t)mem.curAvailableDedicatedVideoMemory;
- }
- return (int)ret;
-}
-#endif
-
-#define MAXCLOCKS 255
/* apply config clocks to an used device */
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
{
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
+ //if (need_nvsettings) /* prefer later than init time */
+ // nvs_set_clocks(dev_id);
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
@@ -352,36 +326,17 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
}
uint32_t mem_prev = clock_prev_mem[dev_id];
- if(!mem_prev)
- {
- rc = nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev);
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "GPU #%d: unable to query memory clock", dev_id);
- return -1;
- }
- }
+ if (!mem_prev)
+ nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev);
uint32_t gpu_prev = clock_prev[dev_id];
- if(!gpu_prev)
- {
- rc = nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev);
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "GPU #%d: unable to query graphics clock", dev_id);
- return -1;
- }
- }
+ if (!gpu_prev)
+ nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev);
- rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "GPU #%d: unable to query default memory clock", dev_id);
- return -1;
- }
+ nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
if (rc != NVML_SUCCESS) {
- applog(LOG_WARNING, "GPU #%d: unable to query default graphics clock", dev_id);
- return -1;
+ applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
+ return -EINVAL;
}
if (opt_debug)
@@ -391,45 +346,43 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
- // these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
- uint32_t nclocks = MAXCLOCKS;
- uint32_t clocks[MAXCLOCKS] = {0};
-
- rc = nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "GPU #%d: unable to query supported memory clocks", dev_id);
- return -1;
- }
+ // these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
+ uint32_t nclocks = 0, mem_clocks[32] = { 0 };
+ nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
+ nclocks = min(nclocks, 32);
+ if (nclocks)
+ nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
for (uint8_t u=0; u < nclocks; u++) {
// ordered by pstate (so highest is first memory clock - P0)
- if(clocks[u] <= mem_clk)
- {
- mem_clk = clocks[u];
+ if (mem_clocks[u] <= mem_clk) {
+ mem_clk = mem_clocks[u];
break;
}
}
- nclocks = MAXCLOCKS;
- rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "GPU #%d: unable to query supported graphics clocks", dev_id);
- return -1;
- }
- for (uint8_t u=0; u < nclocks; u++) {
- // ordered desc, so get first
- if (clocks[u] <= gpu_clk) {
- gpu_clk = clocks[u];
- break;
+ uint32_t* gpu_clocks = NULL;
+ nclocks = 0;
+ nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
+ if (nclocks) {
+ if (opt_debug)
+ applog(LOG_DEBUG, "GPU #%d: %u clocks found for mem %u", dev_id, nclocks, mem_clk);
+ gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4);
+ nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
+ for (uint8_t u=0; u < nclocks; u++) {
+ // ordered desc, so get first
+ if (gpu_clocks[u] <= gpu_clk) {
+ gpu_clk = gpu_clocks[u];
+ break;
+ }
}
+ free(gpu_clocks);
}
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
if (rc == NVML_SUCCESS)
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
else {
- applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
+ applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
return -1;
}
@@ -446,6 +399,8 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
+ if (need_nvsettings)
+ nvs_reset_clocks(dev_id);
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
@@ -472,7 +427,6 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
return ret;
}
-
/**
* Set power state of a device (9xx)
* Code is similar as clocks one, which allow the change of the pstate
@@ -493,12 +447,7 @@ int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
return -EPERM;
}
- rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetDefaultApplicationsClock: %s", dev_id, nvmlh->nvmlErrorString(rc));
- return -1;
- }
+ nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
@@ -509,60 +458,155 @@ int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
- // these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
- uint32_t clocks[MAXCLOCKS] = {0};
- uint32_t nclocks = MAXCLOCKS;
+ // these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
+ uint32_t nclocks = 0, mem_clocks[32] = { 0 };
int8_t wanted_pstate = device_pstate[dev_id];
- rc = nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetSupportedMemoryClocks: %s", dev_id, nvmlh->nvmlErrorString(rc));
- return -1;
- }
- if(wanted_pstate < 0)
- return -1;
- if(wanted_pstate < nclocks)
- {
- mem_clk = clocks[wanted_pstate];
+ nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
+ nclocks = min(nclocks, 32);
+ if (nclocks)
+ nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
+ if ((uint32_t) wanted_pstate+1 > nclocks) {
+ applog(LOG_WARNING, "GPU #%d: only %u mem clocks available (p-states)", dev_id, nclocks);
}
- else
- {
- applog(LOG_WARNING, "GPU #%d: pstate %d is unsupported");
- return -1;
+ for (uint8_t u=0; u < nclocks; u++) {
+ // ordered by pstate (so highest P0 first)
+ if (u == wanted_pstate) {
+ mem_clk = mem_clocks[u];
+ break;
+ }
}
- nclocks = MAXCLOCKS;
- rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
- if(rc != NVML_SUCCESS)
- {
- applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetSupportedGraphicsClocks: %s", dev_id, nvmlh->nvmlErrorString(rc));
- return -1;
- }
- if(device_gpu_clocks[dev_id] == 0)
- gpu_clk = 9999;
- for(uint8_t u = 0; u < nclocks; u++)
- {
- // ordered desc, so get first
- if(clocks[u] <= gpu_clk)
- {
- gpu_clk = clocks[u];
- break;
+ uint32_t* gpu_clocks = NULL;
+ nclocks = 0;
+ nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
+ if (nclocks) {
+ gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4);
+ rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
+ if (rc == NVML_SUCCESS) {
+ // ordered desc, get the max app clock (do not limit)
+ gpu_clk = gpu_clocks[0];
}
+ free(gpu_clocks);
}
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
if (rc != NVML_SUCCESS) {
- applog(LOG_WARNING, "GPU #%d: pstate %s", dev_id, nvmlh->nvmlErrorString(rc));
+ applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int) wanted_pstate,
+ mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
return -1;
}
if (!opt_quiet)
- applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int)wanted_pstate, mem_clk, gpu_clk);
+ applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk);
clock_prev[dev_id] = 1;
return 1;
}
+int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
+{
+ nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
+ uint32_t gpu_clk = 0, mem_clk = 0;
+ int n = nvmlh->cuda_nvml_device_id[dev_id];
+ if (n < 0 || n >= nvmlh->nvml_gpucount)
+ return -ENODEV;
+
+ if (!device_plimit[dev_id])
+ return 0; // nothing to do
+
+ if (!nvmlh->nvmlDeviceSetPowerManagementLimit)
+ return -ENOSYS;
+
+ uint32_t plimit = device_plimit[dev_id] * 1000;
+ uint32_t pmin = 1000, pmax = 0, prev_limit = 0;
+ if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
+ rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
+
+ if (rc != NVML_SUCCESS) {
+ if (!nvmlh->nvmlDeviceGetPowerManagementLimit)
+ return -ENOSYS;
+ }
+ nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit);
+ if (!pmax) pmax = prev_limit;
+
+ plimit = min(plimit, pmax);
+ plimit = max(plimit, pmin);
+ rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
+ if (rc != NVML_SUCCESS) {
+#ifndef WIN32
+ applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
+#endif
+ return -1;
+ } else {
+ device_plimit[dev_id] = plimit / 1000;
+ nvml_plimit_set = true;
+ }
+
+ if (!opt_quiet) {
+ applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
+ dev_id, plimit/1000U, pmin/1000U, pmax/1000U);
+ }
+
+ limit_prev[dev_id] = prev_limit;
+ return 1;
+}
+
+uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id)
+{
+ uint32_t plimit = 0;
+ int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1;
+ if (n < 0 || n >= nvmlh->nvml_gpucount)
+ return 0;
+
+ if (nvmlh->nvmlDeviceGetPowerManagementLimit) {
+ nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit);
+ }
+ return plimit;
+}
+
+// ccminer -D -n
+#define LSTDEV_PFX " "
+void nvml_print_device_info(int dev_id)
+{
+ if (!hnvml) return;
+
+ int n = hnvml->cuda_nvml_device_id[dev_id];
+ if (n < 0 || n >= hnvml->nvml_gpucount)
+ return;
+
+ nvmlReturn_t rc;
+
+ // fprintf(stderr, "------ Hardware ------\n");
+ int gvid = hnvml->nvml_pci_vendor_id[n] & 0xFFFF;
+ int gpid = hnvml->nvml_pci_vendor_id[n] >> 16;
+ int svid = hnvml->nvml_pci_subsys_id[n] & 0xFFFF;
+ int spid = hnvml->nvml_pci_subsys_id[n] >> 16;
+
+ fprintf(stderr, LSTDEV_PFX "ID %04x:%04x/%04x:%04x BUS %04x:%02x:%02x.0\n", gvid, gpid, svid, spid,
+ (int) hnvml->nvml_pci_domain_id[n], (int) hnvml->nvml_pci_bus_id[n], (int) hnvml->nvml_pci_device_id[n]);
+
+ if (hnvml->nvmlDeviceGetClock) {
+ uint32_t gpu_clk = 0, mem_clk = 0;
+
+ // fprintf(stderr, "------- Clocks -------\n");
+
+ hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk);
+ rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk);
+ if (rc == NVML_SUCCESS) {
+ fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
+ }
+ hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk);
+ rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk);
+ if (rc == NVML_SUCCESS) {
+ fprintf(stderr, LSTDEV_PFX "TARGET MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
+ }
+ hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk);
+ rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk);
+ if (rc == NVML_SUCCESS) {
+ fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
+ }
+ }
+}
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
{
@@ -576,6 +620,7 @@ int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount)
return 0;
}
+
int nvml_get_gpu_name(nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
@@ -629,6 +674,22 @@ int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
return 0;
}
+
+int nvml_get_current_clocks(int cudaindex, unsigned int *graphics_clock, unsigned int *mem_clock)
+{
+ nvmlReturn_t rc;
+ int gpuindex = hnvml->cuda_nvml_device_id[cudaindex];
+ if (gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -ENODEV;
+ if (!hnvml->nvmlDeviceGetClockInfo) return -ENOSYS;
+
+ rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock);
+ if (rc != NVML_SUCCESS) return -1;
+ rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock);
+ if (rc != NVML_SUCCESS) return -1;
+
+ return 0;
+}
+
/* Not Supported on 750Ti 340.23 */
int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
{
@@ -739,9 +800,11 @@ int nvml_get_info(nvml_handle *nvmlh, int cudaindex, uint16_t &vid, uint16_t &pi
return -ENODEV;
subids = nvmlh->nvml_pci_subsys_id[gpuindex];
- if (!subids) subids = nvmlh->nvml_pci_device_id[gpuindex];
+ if (!subids) subids = nvmlh->nvml_pci_vendor_id[gpuindex];
pid = subids >> 16;
vid = subids & 0xFFFF;
+ // Colorful and Inno3D
+ if (pid == 0) pid = nvmlh->nvml_pci_vendor_id[gpuindex] >> 16;
return 0;
}
@@ -754,6 +817,7 @@ int nvml_destroy(nvml_handle *nvmlh)
free(nvmlh->nvml_pci_bus_id);
free(nvmlh->nvml_pci_device_id);
free(nvmlh->nvml_pci_domain_id);
+ free(nvmlh->nvml_pci_vendor_id);
free(nvmlh->nvml_pci_subsys_id);
free(nvmlh->nvml_cuda_device_id);
free(nvmlh->cuda_nvml_device_id);
@@ -764,6 +828,8 @@ int nvml_destroy(nvml_handle *nvmlh)
return 0;
}
+// ----------------------------------------------------------------------------
+
/**
* nvapi alternative for windows x86 binaries
* nvml api doesn't exists as 32bit dll :///
@@ -771,8 +837,11 @@ int nvml_destroy(nvml_handle *nvmlh)
#ifdef WIN32
#include "nvapi/nvapi_ccminer.h"
+static unsigned int nvapi_dev_map[MAX_GPUS] = { 0 };
static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 };
+static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 };
static NvU32 nvapi_dev_cnt = 0;
+extern bool nvapi_dll_loaded;
int nvapi_temperature(unsigned int devNum, unsigned int *temperature)
{
@@ -819,7 +888,7 @@ int nvapi_fanspeed(unsigned int devNum, unsigned int *speed)
return 0;
}
-int nvapi_getpstate(unsigned int devNum, unsigned int *power)
+int nvapi_getpstate(unsigned int devNum, unsigned int *pstate)
{
NvAPI_Status ret;
@@ -837,7 +906,7 @@ int nvapi_getpstate(unsigned int devNum, unsigned int *power)
}
else {
// get pstate for the moment... often 0 = P0
- (*power) = (unsigned int)CurrentPstate;
+ (*pstate) = (unsigned int)CurrentPstate;
}
return 0;
@@ -891,6 +960,8 @@ int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid)
if (vid == 0x10DE && pSubSystemId) {
vid = pSubSystemId & 0xFFFF;
pid = pSubSystemId >> 16;
+ // Colorful and Inno3D
+ if (pid == 0) pid = pDeviceId >> 16;
}
return 0;
@@ -898,25 +969,27 @@ int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid)
int nvapi_getserial(unsigned int devNum, char *serial, unsigned int maxlen)
{
-// NvAPI_Status ret;
+ NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
- sprintf(serial, "");
+ memset(serial, 0, maxlen);
- if (maxlen < 64) // Short String
- return -1;
+ if (maxlen < 11)
+ return -EINVAL;
-#if 0
- ret = NvAPI_GPU_Get..(phys[devNum], serial);
+ NvAPI_ShortString ser = { 0 };
+ ret = NvAPI_DLL_GetSerialNumber(phys[devNum], ser);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
- applog(LOG_DEBUG, "NVAPI ...: %s", string);
+ applog(LOG_DEBUG, "NVAPI GetSerialNumber: %s", string);
return -1;
}
-#endif
+
+ uint8_t *bytes = (uint8_t*) ser;
+ for (int n=0; n<5; n++) sprintf(&serial[n*2], "%02X", bytes[n]);
return 0;
}
@@ -939,20 +1012,565 @@ int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen)
}
return 0;
}
+
+static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevState)
+{
+ NvAPI_Status ret = NVAPI_OK;
+ NV_I2C_INFO_EX* i2cInfo;
+
+ int delay1 = 20000;
+ int delay2 = 0;
+
+ uchar4 rgb = { 0 };
+ memcpy(&rgb, &RGB, 4);
+ uchar4 prgb = { 0 };
+ int32_t prev = device_led_state[nvapi_devid(devNum)];
+ memcpy(&prgb, &prev, 4);
+
+ NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo);
+ if (i2cInfo == NULL) return -ENOMEM;
+
+ NvU32 data[5] = { 0 };
+ NvU32 datv[2] = { 0, 1 };
+ NvU32 datw[2] = { 1, 0 };
+ if (rgb.z != prgb.z || ignorePrevState) {
+ data[2] = 4; // R:4 G:5 B:6, Mode = 7 (1 static, 2 breath, 3 blink, 4 demo)
+ data[3] = 1;
+ datv[0] = rgb.z | 0x13384000;
+
+ i2cInfo->i2cDevAddress = 0x52;
+ i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
+ i2cInfo->regAddrSize = 1;
+ i2cInfo->pbData = (NvU8*) datv;
+ i2cInfo->cbRead = 5;
+ i2cInfo->cbSize = 1;
+ i2cInfo->portId = 1;
+ i2cInfo->bIsPortIdSet = 1;
+
+ ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
+ usleep(delay1);
+ has_rgb_ok = (ret == NVAPI_OK);
+ }
+
+ if (rgb.y != prgb.y || ignorePrevState) {
+ data[2] = 5;
+ data[3] = 1;
+ datv[0] = rgb.y | 0x4000;
+
+ i2cInfo->i2cDevAddress = 0x52;
+ i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
+ i2cInfo->regAddrSize = 1;
+ i2cInfo->pbData = (NvU8*) datv;
+ i2cInfo->cbRead = 5;
+ i2cInfo->cbSize = 1;
+ i2cInfo->portId = 1;
+ i2cInfo->bIsPortIdSet = 1;
+
+ ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
+ usleep(delay1);
+ has_rgb_ok = (ret == NVAPI_OK);
+ }
+
+ if (rgb.y != prgb.y || ignorePrevState) {
+ data[2] = 6;
+ data[3] = 1;
+ datv[0] = rgb.x | 0x4000;
+
+ i2cInfo->i2cDevAddress = 0x52;
+ i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
+ i2cInfo->regAddrSize = 1;
+ i2cInfo->pbData = (NvU8*) datv;
+ i2cInfo->cbRead = 5;
+ i2cInfo->cbSize = 1;
+ i2cInfo->portId = 1;
+ i2cInfo->bIsPortIdSet = 1;
+
+ ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
+ usleep(delay1);
+ has_rgb_ok = (ret == NVAPI_OK);
+ }
+
+ if (rgb.w && ignorePrevState) {
+ data[2] = 7;
+ data[3] = 1;
+ datv[0] = rgb.w | 0x4000;
+
+ i2cInfo->i2cDevAddress = 0x52;
+ i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
+ i2cInfo->regAddrSize = 1;
+ i2cInfo->pbData = (NvU8*) datv;
+ i2cInfo->cbRead = 5;
+ i2cInfo->cbSize = 1;
+ i2cInfo->portId = 1;
+ i2cInfo->bIsPortIdSet = 1;
+
+ ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
+ usleep(delay1);
+ has_rgb_ok = (ret == NVAPI_OK);
+ }
+ usleep(delay2);
+ free(i2cInfo);
+ return (int) ret;
+}
+
+static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB)
+{
+ NvAPI_Status ret;
+ NV_I2C_INFO_EX* i2cInfo;
+ NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo);
+ if (i2cInfo == NULL)
+ return -ENOMEM;
+
+ NvU32 readBuf[25] = { 0 };
+ NvU32 data[5] = { 0 };
+ data[0] = 1;
+ data[2] = swab32(RGB & 0xfcfcfcU) | 0x40;
+
+ i2cInfo->i2cDevAddress = 0x48 << 1;
+ i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
+ i2cInfo->regAddrSize = 4; // NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS
+ i2cInfo->pbData = (NvU8*) readBuf;
+ i2cInfo->cbRead = 2;
+ i2cInfo->cbSize = sizeof(readBuf);
+ i2cInfo->portId = 1;
+ i2cInfo->bIsPortIdSet = 1;
+
+ //ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, data);
+ ret = NvAPI_DLL_I2CReadEx(phys[devNum], i2cInfo, data);
+ usleep(20000);
+ free(i2cInfo);
+ return (int) ret;
+}
+
+static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB)
+{
+ NvAPI_Status ret;
+ NV_I2C_INFO* i2cInfo;
+ NV_INIT_STRUCT_ALLOC(NV_I2C_INFO, i2cInfo);
+ if (i2cInfo == NULL)
+ return -ENOMEM;
+
+ NvU32 buf[25] = { 0 };
+ NvU32 data[5] = { 0 };
+
+ uint32_t color = 0, level = 0x40;
+
+ uchar4 rgb = { 0 };
+ memcpy(&rgb, &RGB, 4);
+ level = rgb.x & 0xF0;
+ level |= rgb.y & 0xF0;
+ level |= rgb.z & 0xF0;
+ //applog(LOG_DEBUG, "R %u G %u B %u", rgb.z, rgb.y, rgb.x);
+
+ // Not really RGB custom, only some basic colors, so convert
+ // 0: Red, 1: Yellow, 2: Green, 3: Cyan, 4: Blue, 5: magenta, 6: white
+ if ((RGB & 0xFF0000) && (RGB & 0xFF00) && (RGB & 0xFF)) color = 6;
+ else if ((RGB & 0xFF0000) && (RGB & 0xFF)) color = 5;
+ else if ((RGB & 0xFF00) && (RGB & 0xFF)) color = 3;
+ else if ((RGB & 0xFF0000) && (RGB & 0xFF00)) color = 1;
+ else if (RGB & 0xFF) color = 4;
+ else if (RGB & 0xFF00) color = 2;
+
+ buf[0] = 0xF0; // F0 set colors
+ buf[0] |= (color << 8); // logo
+ buf[0] |= (1 << 16); // top
+ if (RGB != 0) // level : 0x10 to 0xF0
+ buf[0] |= (level << 24);
+ else
+ buf[0] |= (0x10U << 24);
+
+ // todo: i2c data crc ?
+
+ i2cInfo->displayMask = 1;
+ i2cInfo->bIsDDCPort = 1;
+ i2cInfo->i2cDevAddress = 0x48 << 1;
+ i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
+ i2cInfo->regAddrSize = 1;
+ i2cInfo->pbData = (NvU8*) buf;
+ i2cInfo->cbSize = 4;
+ i2cInfo->i2cSpeed = NVAPI_I2C_SPEED_DEPRECATED;
+ i2cInfo->i2cSpeedKhz = NVAPI_I2C_SPEED_100KHZ; // 4
+ i2cInfo->portId = 1;
+ i2cInfo->bIsPortIdSet = 1;
+
+ ret = NvAPI_I2CWrite(phys[devNum], i2cInfo);
+ // required to prevent i2c lock
+ usleep(20000);
+
+#if 0
+ buf[0] = 0xF7; // F7 toggle leds
+ if (RGB == 0)
+ buf[0] |= (1 << 8); // 0 logo on, 1 off
+ buf[0] |= (1 << 16); // 1 top off
+ ret = NvAPI_I2CWrite(phys[devNum], i2cInfo);
+ usleep(20000);
+#endif
+ // other modes:
+ // 0xF1 breathing green (0x070202F1)
+ // 0xF2 strobe green (0x070202F2)
+ // 0xF3 cycle (0x000000F3)
+
+ free(i2cInfo);
+ return (int) ret;
+}
+
+int nvapi_set_led(unsigned int devNum, int RGB, char *device_name)
+{
+ uint16_t vid = 0, pid = 0;
+ NvAPI_Status ret;
+ if (strstr(device_name, "Gigabyte GTX 10")) {
+ if (opt_debug)
+ applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
+ return SetGigabyteRGBLogo(devNum, (uint32_t) RGB);
+ } else if (strstr(device_name, "ASUS GTX 10")) {
+ if (opt_debug)
+ applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
+ return SetAsusRGBLogo(devNum, (uint32_t) RGB, !has_rgb_ok);
+ } else if (strstr(device_name, "Zotac GTX 10")) {
+ if (opt_debug)
+ applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
+ return SetZotacRGBLogo(devNum, (uint32_t) RGB);
+ } else {
+ NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM* illu;
+ NV_INIT_STRUCT_ALLOC(NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM, illu);
+ illu->hPhysicalGpu = phys[devNum];
+ illu->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS;
+ ret = NvAPI_GPU_QueryIlluminationSupport(illu);
+ if (!ret && illu->bSupported) {
+ NV_GPU_GET_ILLUMINATION_PARM *led;
+ NV_INIT_STRUCT_ALLOC(NV_GPU_GET_ILLUMINATION_PARM, led);
+ led->hPhysicalGpu = phys[devNum];
+ led->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS;
+ NvAPI_GPU_GetIllumination(led);
+ if (opt_debug)
+ applog(LOG_DEBUG, "GPU %x: Led level was %d, set to %d", (int) phys[devNum], led->Value, RGB);
+ led->Value = (uint32_t) RGB;
+ ret = NvAPI_GPU_SetIllumination((NV_GPU_SET_ILLUMINATION_PARM*) led);
+ free(led);
+ }
+ free(illu);
+ return ret;
+ }
+}
+
+int nvapi_pstateinfo(unsigned int devNum)
+{
+ uint32_t n;
+ NvAPI_Status ret;
+ uint32_t* mem = (uint32_t*) calloc(1, 0x4000);
+ if (!mem)
+ return -ENOMEM;
+
+ unsigned int current = 0xFF;
+ // useless on init but...
+ nvapi_getpstate(devNum, ¤t);
+
+#if 0
+ // try :p
+ uint32_t* buf = (uint32_t*) calloc(1, 0x8000);
+ for (int i=8; i < 0x8000 && buf; i+=4) {
+ buf[0] = 0x10000 + i;
+ NV_GPU_PERF_PSTATE_ID pst = NVAPI_GPU_PERF_PSTATE_P0;
+ ret = NvAPI_DLL_GetPstateClientLimits(phys[devNum], pst, buf);
+ if (ret != NVAPI_INCOMPATIBLE_STRUCT_VERSION) {
+ NvAPI_ShortString string;
+ NvAPI_GetErrorMessage(ret, string);
+ applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string);
+ for (int n=0; n < i/32; n++)
+ applog_hex(&buf[n*(32/4)], 32);
+ break;
+ }
+ }
+ free(buf);
+#endif
+
+#if 0
+ // Unsure of the meaning of these values
+ NVAPI_GPU_POWER_TOPO topo = { 0 };
+ topo.version = NVAPI_GPU_POWER_TOPO_VER;
+ if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) {
+ if (topo.count)
+ applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?",
+ (double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000);
+
+ // Ok on 970, not pascal
+ NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 };
+ pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2;
+ pset2.ov.numVoltages = 1;
+ pset2.ov.voltages[0].voltDelta_uV.value = 3000; // gpu + 3000 uv;
+ ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2);
+#endif
+
+ NV_GPU_PERF_PSTATES20_INFO* info;
+ NV_INIT_STRUCT_ON(NV_GPU_PERF_PSTATES20_INFO, info, mem);
+ if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], info)) != NVAPI_OK) {
+ NvAPI_ShortString string;
+ NvAPI_GetErrorMessage(ret, string);
+ if (opt_debug)
+ applog(LOG_RAW, "NVAPI GetPstates20: %s", string);
+ return -1;
+ }
+
+ for (n=0; n < info->numPstates; n++) {
+ NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info->pstates[n].clocks;
+ applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d",
+ info->pstates[n].pstateId == current ? ">":" ", (int) info->pstates[n].pstateId,
+ clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ",
+ (double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ",
+ info->pstates[n].baseVoltages[0].volt_uV/1000, info->pstates[n].baseVoltages[0].bIsEditable ? "*": " ",
+ info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
+ info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
+ if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
+ applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz",
+ clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
+ }
+ }
+ // boost over volting (GTX 9xx only ?)
+ for (n=0; n < info->ov.numVoltages; n++) {
+ applog(LOG_RAW, " OV: %u%+d mV%s \x7F %d/%d",
+ info->ov.voltages[n].volt_uV/1000, info->ov.voltages[n].voltDelta_uV.value/1000, info->ov.voltages[n].bIsEditable ? "*":" ",
+ info->ov.voltages[n].voltDelta_uV.valueRange.min/1000, info->ov.voltages[n].voltDelta_uV.valueRange.max/1000);
+ }
+
+ NV_GPU_CLOCK_FREQUENCIES *freqs;
+ NV_INIT_STRUCT_ON(NV_GPU_CLOCK_FREQUENCIES, freqs, mem);
+ freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+ ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+ applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Base Clocks",
+ (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+ (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+ freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
+ ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+ applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Boost Clocks",
+ (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+ (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+ freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
+ ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+ applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz >Current",
+ (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+ (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+ // Other clock values ??
+ NVAPI_GPU_PERF_CLOCKS *pcl;
+ NV_INIT_STRUCT_ALLOC(NVAPI_GPU_PERF_CLOCKS, pcl);
+ int numClock=0; ret = NVAPI_OK;
+ while (ret == NVAPI_OK) {
+ if ((ret = NvAPI_DLL_GetPerfClocks(phys[devNum], numClock, pcl)) == NVAPI_OK) {
+ applog(LOG_RAW, " C%d: MEM %4.0f MHz GPU %6.1f MHz [%5.1f/%6.1f]", numClock,
+ (double) pcl->memFreq1/1000, (double) pcl->gpuFreq1/1000, (double) pcl->gpuFreqMin/1000, (double) pcl->gpuFreqMax/1000);
+ // ret = NvAPI_DLL_SetPerfClocks(phys[devNum], numClock, pcl); // error
+ }
+ numClock++;
+ }
+
+ // Pascal only
+ NVAPI_VOLTBOOST_PERCENT *pvb;
+ NV_INIT_STRUCT_ON(NVAPI_VOLTBOOST_PERCENT, pvb, mem);
+ if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], pvb)) == NVAPI_OK) {
+ NVAPI_VOLTAGE_STATUS *pvdom;
+ NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGE_STATUS, pvdom);
+ NvAPI_DLL_GetCurrentVoltage(phys[devNum], pvdom);
+ if (pvdom && pvdom->value_uV)
+ applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom->value_uV/1000, pvb->percent);
+ else if (pvdom)
+ applog(LOG_RAW, " GPU Voltage is %u mV", pvdom->value_uV/1000);
+ free(pvdom);
+ } else {
+ // Maxwell 9xx
+ NVAPI_VOLT_STATUS *mvdom, *mvstep;
+ NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvdom);
+ if (mvdom && (ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], mvdom)) == NVAPI_OK) {
+ NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvstep);
+ NvAPI_DLL_GetVoltageStep(phys[devNum], mvstep);
+ if (mvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %.1f mV with %.3f mV resolution",
+ (double) mvdom->value_uV/1000, (double) mvstep->value_uV/1000);
+ free(mvstep);
+ }
+ free(mvdom);
+ }
+
+ uint32_t plim = nvapi_get_plimit(devNum);
+ double min_pw = 0, max_pw = 0; // percent
+
+ NVAPI_GPU_POWER_INFO nfo = { 0 };
+ nfo.version = NVAPI_GPU_POWER_INFO_VER;
+ ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
+ if (ret == NVAPI_OK && nfo.valid) {
+ min_pw = (double)nfo.entries[0].min_power / 1000;
+ max_pw = (double)nfo.entries[0].max_power / 1000;
+ }
+ applog(LOG_RAW, " Power limit is set to %u%%, range [%.0f-%.0f%%]", plim, min_pw, max_pw);
+
+#if 0
+ NVAPI_COOLER_SETTINGS *cooler;
+ NV_INIT_STRUCT_ON(NVAPI_COOLER_SETTINGS, cooler, mem);
+ ret = NvAPI_DLL_GetCoolerSettings(phys[devNum], 7, cooler);
+ if (ret == NVAPI_OK) {
+ applog(LOG_RAW, " Fan level is set to %u%%", cooler->level); // wrong val, seems 1 (auto ?)
+ NVAPI_COOLER_LEVEL *fan;
+ NV_INIT_STRUCT_ALLOC(NVAPI_COOLER_LEVEL, fan);
+ fan->level = 100;
+ fan->count = 1;
+ ret = NvAPI_DLL_SetCoolerLevels(phys[devNum], 7, fan);
+ free(fan);
+ sleep(10);
+ ret = NvAPI_DLL_RestoreCoolerSettings(phys[devNum], cooler, 7);
+ }
+#endif
+
+ NV_GPU_THERMAL_SETTINGS *tset;
+ NV_INIT_STRUCT_ON(NV_GPU_THERMAL_SETTINGS, tset, mem);
+
+ NVAPI_GPU_THERMAL_INFO *tnfo;
+ NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_INFO, tnfo);
+ NVAPI_GPU_THERMAL_LIMIT *tlim;
+ NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_LIMIT, tlim);
+ NvAPI_GPU_GetThermalSettings(phys[devNum], 0, tset);
+ NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], tnfo);
+ if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], tlim)) == NVAPI_OK) {
+ applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]",
+ tlim->entries[0].value >> 8, tset->sensor[0].currentTemp,
+ tnfo->entries[0].min_temp >> 8, tnfo->entries[0].max_temp >> 8);
+ }
+ free(tnfo);
+ free(tlim);
+
+#if 1
+ // Read pascal Clocks Table, Empty on 9xx
+ //NVAPI_CLOCKS_RANGE* ranges;
+ //NV_INIT_STRUCT_ON(NVAPI_CLOCKS_RANGE, ranges, mem);
+ //ret = NvAPI_DLL_GetClockBoostRanges(phys[devNum], ranges);
+
+ NVAPI_CLOCK_MASKS* boost;
+ NV_INIT_STRUCT_ON(NVAPI_CLOCK_MASKS, boost, mem);
+ ret = NvAPI_DLL_GetClockBoostMask(phys[devNum], boost);
+ int gpuClocks = 0, memClocks = 0;
+ for (n=0; n < 80+23; n++) {
+ if (boost->clocks[n].memDelta) memClocks++;
+ if (boost->clocks[n].gpuDelta) gpuClocks++;
+ }
+
+ // PASCAL GTX ONLY
+ if (gpuClocks || memClocks) {
+ NVAPI_CLOCK_TABLE *table;
+ NV_INIT_STRUCT_ALLOC(NVAPI_CLOCK_TABLE, table);
+ memcpy(table->mask, boost->mask, 12);
+ ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], table);
+ gpuClocks = 0, memClocks = 0;
+ for (n=0; n < 12; n++) {
+ if (table->buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table->buf0[n]);
+ }
+ for (n=0; n < 80; n++) {
+ if (table->gpuDeltas[n].freqDelta) {
+ // note: gpu delta value seems to be x2, not the memory
+ //applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table->gpuDeltas[n].freqDelta/2000);
+ gpuClocks++;
+ }
+ }
+ for (n=0; n < 23; n++) {
+ if (table->memFilled[n]) {
+ //applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table->memDeltas[n]/1000);
+ memClocks++;
+ }
+ }
+ for (n=0; n < 1529; n++) {
+ if (table->buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table->buf1[n]);
+ }
+ applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
+ free(table);
+
+ NVAPI_VFP_CURVE *curve;
+ NV_INIT_STRUCT_ALLOC(NVAPI_VFP_CURVE, curve);
+ memcpy(curve->mask, boost->mask, 12);
+ ret = NvAPI_DLL_GetVFPCurve(phys[devNum], curve);
+ gpuClocks = 0, memClocks = 0;
+ for (n=0; n < 80; n++) {
+ if (curve->gpuEntries[n].freq_kHz || curve->gpuEntries[n].volt_uV) {
+ // applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve->gpuEntries[n].freq_kHz/1000, curve->gpuEntries[n].volt_uV/1000);
+ gpuClocks++;
+ }
+ }
+ for (n=0; n < 23; n++) {
+ if (curve->memEntries[n].freq_kHz || curve->memEntries[n].volt_uV) {
+ // applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve->memEntries[n].freq_kHz/1000, curve->memEntries[n].volt_uV/1000);
+ memClocks++;
+ }
+ }
+ for (n=0; n < 1064; n++) {
+ if (curve->buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve->buf1[n]);
+ }
+ applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
+ free(curve);
+ }
+
+ // Maxwell
+ else {
+ NVAPI_VOLTAGES_TABLE* volts;
+ NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGES_TABLE, volts);
+ int entries = 0;
+ ret = NvAPI_DLL_GetVoltages(phys[devNum], volts);
+ for (n=0; n < 128; n++) {
+ if (volts->entries[n].volt_uV)
+ entries++;
+ }
+ applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
+ free(volts);
+ }
+
+ NV_DISPLAY_DRIVER_MEMORY_INFO* meminfo;
+ NV_INIT_STRUCT_ON(NV_DISPLAY_DRIVER_MEMORY_INFO, meminfo, mem);
+ meminfo->version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
+ if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], meminfo)) == NVAPI_OK) {
+ applog(LOG_RAW, " Memory: %u MB, %.1f used", meminfo->dedicatedVideoMemory/1024,
+ (double) (meminfo->availableDedicatedVideoMemory - meminfo->curAvailableDedicatedVideoMemory)/1024);
+ }
+#if 0 /* some undetermined stats */
+ NVAPI_GPU_PERF_INFO pi = { 0 };
+ pi.version = NVAPI_GPU_PERF_INFO_VER;
+ ret = NvAPI_DLL_PerfPoliciesGetInfo(phys[devNum], &pi);
+
+ NVAPI_GPU_PERF_STATUS ps = { 0 };
+ ps.version = NVAPI_GPU_PERF_STATUS_VER;
+ ret = NvAPI_DLL_PerfPoliciesGetStatus(phys[devNum], &ps);
+ applog(LOG_BLUE, "%llx %lld. %lld. %llx %llx %llx", ps.timeRef, ps.val1, ps.val2, ps.values[0], ps.values[1], ps.values[2]);
+#endif
+
+#endif
+ free(mem);
+ return 0;
+}
+
+// workaround for buggy driver 378.49
+unsigned int nvapi_get_gpu_clock(unsigned int devNum)
+{
+ NvAPI_Status ret = NVAPI_OK;
+ unsigned int freq = 0;
+ NV_GPU_CLOCK_FREQUENCIES *freqs;
+ NV_INIT_STRUCT_ALLOC(NV_GPU_CLOCK_FREQUENCIES, freqs);
+ freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
+ ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+ if (ret == NVAPI_OK) {
+ freq = freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000;
+ }
+ free(freqs);
+ return freq; // in MHz
+}
+
uint8_t nvapi_get_plimit(unsigned int devNum)
{
NvAPI_Status ret = NVAPI_OK;
- NVAPI_GPU_POWER_STATUS pol = {0};
+ NVAPI_GPU_POWER_STATUS pol = { 0 };
pol.version = NVAPI_GPU_POWER_STATUS_VER;
- if((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK)
- {
+ if ((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
- if(opt_debug)
+ if (opt_debug)
applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string);
return 0;
}
- return (uint8_t)(pol.entries[0].power / 1000); // in percent
+ return (uint8_t) (pol.entries[0].power / 1000); // in percent
}
int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
@@ -960,39 +1578,208 @@ int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
NvAPI_Status ret = NVAPI_OK;
uint32_t val = percent * 1000;
- NVAPI_GPU_POWER_INFO nfo = {0};
+ NVAPI_GPU_POWER_INFO nfo = { 0 };
nfo.version = NVAPI_GPU_POWER_INFO_VER;
ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
- if(ret == NVAPI_OK)
- {
- if(val == 0)
+ if (ret == NVAPI_OK) {
+ if (val == 0)
val = nfo.entries[0].def_power;
- else if(val < nfo.entries[0].min_power)
+ else if (val < nfo.entries[0].min_power)
val = nfo.entries[0].min_power;
- else if(val > nfo.entries[0].max_power)
+ else if (val > nfo.entries[0].max_power)
val = nfo.entries[0].max_power;
}
- NVAPI_GPU_POWER_STATUS pol = {0};
+ NVAPI_GPU_POWER_STATUS pol = { 0 };
pol.version = NVAPI_GPU_POWER_STATUS_VER;
pol.flags = 1;
pol.entries[0].power = val;
- if((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK)
- {
+ if ((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
- if(opt_debug)
+ if (opt_debug)
applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string);
return -1;
}
return ret;
}
+int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
+{
+ NvAPI_Status ret;
+ uint32_t val = limit;
+
+ if (devNum >= nvapi_dev_cnt)
+ return -ENODEV;
+
+ NV_GPU_THERMAL_SETTINGS tset = { 0 };
+ NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
+ NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
+ tset.version = NV_GPU_THERMAL_SETTINGS_VER;
+ NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset);
+ tnfo.version = NVAPI_GPU_THERMAL_INFO_VER;
+ NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo);
+ tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER;
+ if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) {
+ tlim.entries[0].value = val << 8;
+ tlim.flags = 1;
+ ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
+ if (ret == NVAPI_OK) {
+ applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]",
+ devNum, val, tset.sensor[0].currentTemp,
+ tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
+ } else {
+ NvAPI_ShortString string;
+ NvAPI_GetErrorMessage(ret, string);
+ applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string,
+ tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
+ }
+ }
+ return (int) ret;
+}
+
+int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
+{
+ NvAPI_Status ret;
+ NvS32 delta = 0;
+
+ if (devNum >= nvapi_dev_cnt)
+ return -ENODEV;
+#if 0
+ // wrong api to get default base clock when modified, cuda props seems fine
+ NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
+ freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
+ freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+ ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
+ if (ret == NVAPI_OK) {
+ delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
+ }
+
+ NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
+ deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
+ ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
+ if (ret == NVAPI_OK) {
+ if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
+ delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2;
+ }
+#endif
+
+ cudaDeviceProp props = { 0 };
+ NvU32 busId = 0xFFFF;
+ ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
+ for (int d=0; d < (int) nvapi_dev_cnt; d++) {
+ // unsure about devNum, so be safe
+ cudaGetDeviceProperties(&props, d);
+ if (props.pciBusID == busId) {
+ delta = (clock * 1000) - props.clockRate;
+ break;
+ }
+ }
+
+ if (delta == (clock * 1000))
+ return ret;
+
+ NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
+ pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+ pset1.numPstates = 1;
+ pset1.numClocks = 1;
+ // Ok on both 1080 and 970
+ pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
+ pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
+ ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+ if (ret == NVAPI_OK) {
+ applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000);
+ }
+ return ret;
+}
+
+int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
+{
+ NvAPI_Status ret;
+ NvS32 delta = 0;
+
+ if (devNum >= nvapi_dev_cnt)
+ return -ENODEV;
+
+ // wrong to get default base clock (when modified) on maxwell (same as cuda props one)
+ NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
+ freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
+ freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+ ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
+ if (ret == NVAPI_OK) {
+ delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
+ }
+
+ // seems ok on maxwell and pascal for the mem clocks
+ NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
+ deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
+ ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
+ if (ret == NVAPI_OK) {
+ if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
+ delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
+ }
+
+ if (delta == (clock * 1000))
+ return ret;
+
+ // todo: bounds check with GetPstates20
+
+ NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
+ pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+ pset1.numPstates = 1;
+ pset1.numClocks = 1;
+ pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
+ pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
+ ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+ if (ret == NVAPI_OK) {
+ applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000);
+ }
+ return ret;
+}
+
+static int nvapi_set_memoffset(unsigned int devNum, int32_t delta, bool log=true)
+{
+ NvAPI_Status ret;
+ NvS32 deltaKHz = delta * 1000;
+
+ if (devNum >= nvapi_dev_cnt)
+ return -ENODEV;
+
+ // todo: bounds check with GetPstates20
+
+ NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
+ pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+ pset1.numPstates = 1;
+ pset1.numClocks = 1;
+ pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
+ pset1.pstates[0].clocks[0].freqDelta_kHz.value = deltaKHz;
+ ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+ if (ret == NVAPI_OK) {
+ if (log) applog(LOG_INFO, "GPU #%u: Memory clock offset set to %+d MHz", devNum, deltaKHz / 1000);
+ need_memclockrst = true;
+ }
+ return ret;
+}
+
+// Replacement for WIN32 CUDA 6.5 on pascal
+int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
+{
+ NvAPI_Status ret = NVAPI_OK;
+ NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
+ mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
+ unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
+ if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
+ *total = (uint64_t) mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
+ *free = (uint64_t) mem.curAvailableDedicatedVideoMemory;
+ }
+ return (int) ret;
+}
+
int nvapi_init()
{
int num_gpus = cuda_num_devices();
NvAPI_Status ret = NvAPI_Initialize();
- if (!ret == NVAPI_OK){
+ if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
@@ -1038,11 +1825,12 @@ int nvapi_init()
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
}
}
-
#if 0
- NvAPI_ShortString ver;
- NvAPI_GetInterfaceVersionString(ver);
- applog(LOG_DEBUG, "NVAPI Version: %s", ver);
+ if (opt_debug) {
+ NvAPI_ShortString ver;
+ NvAPI_GetInterfaceVersionString(ver);
+ applog(LOG_DEBUG, "%s", ver);
+ }
#endif
NvU32 udv;
@@ -1054,7 +1842,92 @@ int nvapi_init()
return 0;
}
-#endif
+
+int nvapi_init_settings()
+{
+ // nvapi.dll
+ int ret = nvapi_dll_init();
+ if (ret != NVAPI_OK)
+ return ret;
+
+ if (!opt_n_threads) {
+ opt_n_threads = active_gpus;
+ }
+
+ for (int n=0; n < opt_n_threads; n++) {
+ int dev_id = device_map[n % MAX_GPUS];
+ if (device_plimit[dev_id] && !nvml_plimit_set) {
+ if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) {
+ uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]);
+ gpulog(LOG_INFO, n, "Power limit is set to %u%%", res);
+ }
+ }
+ if (device_tlimit[dev_id]) {
+ nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]);
+ }
+ if (device_gpu_clocks[dev_id]) {
+ ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]);
+ if (ret) {
+ NvAPI_ShortString string;
+ NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
+ gpulog(LOG_WARNING, n, "nvapi_set_gpuclock %s", string);
+ }
+ }
+ if (device_mem_offsets[dev_id]) {
+ ret = nvapi_set_memoffset(nvapi_dev_map[dev_id], device_mem_offsets[dev_id]);
+ if (ret) {
+ NvAPI_ShortString string;
+ NvAPI_GetErrorMessage((NvAPI_Status)ret, string);
+ gpulog(LOG_WARNING, n, "nvapi_set_memoffset %s", string);
+ }
+ }
+ else if (device_mem_clocks[dev_id]) {
+ ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]);
+ if (ret) {
+ NvAPI_ShortString string;
+ NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
+ gpulog(LOG_WARNING, n, "nvapi_set_memclock %s", string);
+ }
+ }
+ if (device_pstate[dev_id]) {
+ // dunno how via nvapi or/and pascal
+ }
+ if (device_led[dev_id] != -1) {
+ int err = nvapi_set_led(nvapi_dev_map[dev_id], device_led[dev_id], device_name[dev_id]);
+ if (err != 0) {
+ gpulog(LOG_WARNING, n, "Unable to set led value (err %d)", err);
+ }
+ device_led_state[dev_id] = device_led[dev_id];
+ }
+ }
+
+ return ret;
+}
+
+void nvapi_toggle_clocks(int thr_id, bool enable)
+{
+ int dev_id = device_map[thr_id % MAX_GPUS];
+ if (device_mem_offsets[dev_id]) {
+ nvapi_set_memoffset(nvapi_dev_map[dev_id], enable ? device_mem_offsets[dev_id] : 0, false);
+ }
+}
+
+unsigned int nvapi_devnum(int dev_id)
+{
+ return nvapi_dev_map[dev_id];
+}
+
+int nvapi_devid(unsigned int devNum)
+{
+ for (int i=0; i < opt_n_threads; i++) {
+ int dev_id = device_map[i % MAX_GPUS];
+ if (nvapi_dev_map[dev_id] = devNum)
+ return dev_id;
+ }
+ return 0;
+}
+
+#endif /* WIN32 : Windows specific (nvapi) */
/* api functions -------------------------------------- */
@@ -1144,13 +2017,11 @@ unsigned int gpu_power(struct cgpu_info *gpu)
{
unsigned int mw = 0;
int support = -1;
- if(hnvml)
- {
+ if (hnvml) {
support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw);
}
#ifdef WIN32
- if(support == -1)
- {
+ if (support == -1) {
unsigned int pct = 0;
nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
@@ -1158,94 +2029,24 @@ unsigned int gpu_power(struct cgpu_info *gpu)
mw = pct; // to fix
}
#endif
- if(gpu->gpu_power > 0)
- {
+ if (gpu->gpu_power > 0) {
// average
mw = (gpu->gpu_power + mw) / 2;
}
return mw;
}
-int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
-{
- nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
- uint32_t gpu_clk = 0, mem_clk = 0;
- int n = nvmlh->cuda_nvml_device_id[dev_id];
- if(n < 0 || n >= nvmlh->nvml_gpucount)
- return -ENODEV;
-
- if(!device_plimit[dev_id])
- return 0; // nothing to do
-
- if(!nvmlh->nvmlDeviceSetPowerManagementLimit)
- return -ENOSYS;
-
- uint32_t plimit = device_plimit[dev_id] * 1000;
- uint32_t pmin = 1000, pmax = 0, prev_limit = 0;
- if(nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
- rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
-
- if(rc != NVML_SUCCESS)
- {
- if(!nvmlh->nvmlDeviceGetPowerManagementLimit)
- return -ENOSYS;
- }
- nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit);
- if(!pmax) pmax = prev_limit;
-
- plimit = min(plimit, pmax);
- plimit = max(plimit, pmin);
- rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
- if(rc != NVML_SUCCESS)
- {
-#ifndef WIN32
- applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
-#endif
- return -1;
- }
- else
- {
- device_plimit[dev_id] = plimit / 1000;
- nvml_plimit_set = true;
- }
-
- if(!opt_quiet)
- {
- applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
- dev_id, plimit / 1000U, pmin / 1000U, pmax / 1000U);
- }
-
- limit_prev[dev_id] = prev_limit;
- return 1;
-}
-
-uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id)
-{
- uint32_t plimit = 0;
- int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1;
- if(n < 0 || n >= nvmlh->nvml_gpucount)
- return 0;
-
- if(nvmlh->nvmlDeviceGetPowerManagementLimit)
- {
- nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit);
- }
- return plimit;
-}
-
unsigned int gpu_plimit(struct cgpu_info *gpu)
{
unsigned int mw = 0;
int support = -1;
- if(hnvml)
- {
+ if (hnvml) {
mw = nvml_get_plimit(hnvml, gpu->gpu_id);
support = (mw > 0);
}
#ifdef WIN32
// NVAPI value is in % (< 100 so)
- if(support == -1)
- {
+ if (support == -1) {
mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
}
#endif
@@ -1259,18 +2060,21 @@ static int translate_vendor_id(uint16_t vid, char *vendorname)
const char *name;
} vendors[] = {
{ 0x1043, "ASUS" },
+ { 0x1048, "Elsa" },
{ 0x107D, "Leadtek" },
{ 0x10B0, "Gainward" },
// { 0x10DE, "NVIDIA" },
{ 0x1458, "Gigabyte" },
{ 0x1462, "MSI" },
- { 0x154B, "PNY" },
+ { 0x154B, "PNY" }, // maybe storage devices
+ { 0x1569, "Palit" },
{ 0x1682, "XFX" },
{ 0x196D, "Club3D" },
+ { 0x196E, "PNY" },
{ 0x19DA, "Zotac" },
{ 0x19F1, "BFG" },
{ 0x1ACC, "PoV" },
- { 0x1B4C, "KFA2" },
+ { 0x1B4C, "Galax" }, // KFA2 in EU, to check on Pascal cards
{ 0x3842, "EVGA" },
{ 0x7377, "Colorful" },
{ 0, "" }
@@ -1290,52 +2094,8 @@ static int translate_vendor_id(uint16_t vid, char *vendorname)
return 0;
}
-#ifdef HAVE_PCIDEV
-extern "C" {
-#include
-}
-static int linux_gpu_vendor(uint8_t pci_bus_id, char* vendorname, uint16_t &pid)
-{
- uint16_t subvendor = 0;
- struct pci_access *pci;
- struct pci_dev *dev;
- uint16_t subdevice;
-
- if (!vendorname)
- return -EINVAL;
-
- pci = pci_alloc();
- if (!pci)
- return -ENODEV;
-
- pci_init(pci);
- pci_scan_bus(pci);
-
- for(dev = pci->devices; dev; dev = dev->next)
- {
- if (dev->bus == pci_bus_id && dev->vendor_id == 0x10DE)
- {
- if (!(dev->known_fields & PCI_FILL_CLASS))
- pci_fill_info(dev, PCI_FILL_CLASS);
- if (dev->device_class != PCI_CLASS_DISPLAY_VGA)
- continue;
- subvendor = pci_read_word(dev, PCI_SUBSYSTEM_VENDOR_ID);
- subdevice = pci_read_word(dev, PCI_SUBSYSTEM_ID); // model
-
- translate_vendor_id(subvendor, vendorname);
- }
- }
- pci_cleanup(pci);
- return (int) subvendor;
-}
-#endif
-
int gpu_vendor(uint8_t pci_bus_id, char *vendorname)
{
-#ifdef HAVE_PCIDEV
- uint16_t pid = 0;
- return linux_gpu_vendor(pci_bus_id, vendorname, pid);
-#else
uint16_t vid = 0, pid = 0;
if (hnvml) { // may not be initialized on start...
for (int id=0; id < hnvml->nvml_gpucount; id++) {
@@ -1355,7 +2115,6 @@ int gpu_vendor(uint8_t pci_bus_id, char *vendorname)
#endif
}
return translate_vendor_id(vid, vendorname);
-#endif
}
int gpu_info(struct cgpu_info *gpu)
@@ -1372,13 +2131,7 @@ int gpu_info(struct cgpu_info *gpu)
if (hnvml) {
gpu->nvml_id = (int8_t) hnvml->cuda_nvml_device_id[id];
-#ifdef HAVE_PCIDEV
- gpu->gpu_vid = linux_gpu_vendor(hnvml->nvml_pci_bus_id[id], vendorname, gpu->gpu_pid);
- if (!gpu->gpu_vid || !gpu->gpu_pid)
- nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid);
-#else
nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid);
-#endif
nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn));
nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc));
}
@@ -1392,3 +2145,132 @@ int gpu_info(struct cgpu_info *gpu)
}
#endif /* USE_WRAPNVML */
+
+static int rgb_percent(int RGB, int percent)
+{
+ uint8_t* comp = (uint8_t*) &RGB;
+ int res = ((percent*comp[2]) / 100) << 16;
+ res += ((percent*comp[1]) / 100) << 8;
+ return res + ((percent*comp[0]) / 100);
+}
+
+void gpu_led_on(int dev_id)
+{
+#if defined(WIN32) && defined(USE_WRAPNVML)
+ int value = device_led[dev_id];
+ if (device_led_state[dev_id] != value) {
+ if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
+ device_led_state[dev_id] = value;
+ }
+#endif
+}
+
+void gpu_led_percent(int dev_id, int percent)
+{
+#if defined(WIN32) && defined(USE_WRAPNVML)
+ int value = rgb_percent(device_led[dev_id], percent);
+ if (device_led_state[dev_id] != value) {
+ if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
+ device_led_state[dev_id] = value;
+ }
+#endif
+}
+
+void gpu_led_off(int dev_id)
+{
+#if defined(WIN32) && defined(USE_WRAPNVML)
+ if (device_led_state[dev_id]) {
+ if (nvapi_set_led(nvapi_dev_map[dev_id], 0, device_name[dev_id]) == 0)
+ device_led_state[dev_id] = 0;
+ }
+#endif
+}
+
+#ifdef USE_WRAPNVML
+extern double thr_hashrates[MAX_GPUS];
+extern bool opt_debug_threads;
+extern bool opt_hwmonitor;
+extern int num_cpus;
+
+void *monitor_thread(void *userdata)
+{
+ int thr_id = -1;
+
+ while (!abort_flag && !opt_quiet)
+ {
+ // This thread monitors card's power lazily during scans, one at a time...
+ thr_id = (thr_id + 1) % opt_n_threads;
+ struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
+ int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id);
+
+ if (hnvml != NULL && cgpu)
+ {
+ char khw[32] = { 0 };
+ uint64_t clock = 0, mem_clock = 0;
+ uint32_t fanpercent = 0, power = 0;
+ double tempC = 0, khs_per_watt = 0;
+ uint32_t counter = 0;
+ int max_loops = 1000;
+
+ pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock);
+
+ do {
+ unsigned int tmp_clock=0, tmp_memclock=0;
+ nvml_get_current_clocks(dev_id, &tmp_clock, &tmp_memclock);
+#ifdef WIN32
+ if (tmp_clock < 200) {
+ // workaround for buggy drivers 378.x (real clock)
+ tmp_clock = nvapi_get_gpu_clock(nvapi_dev_map[dev_id]);
+ }
+#endif
+ if (tmp_clock < 200) {
+ // some older cards only report a base clock with cuda props.
+ if (cuda_gpu_info(cgpu) == 0) {
+ tmp_clock = cgpu->gpu_clock/1000;
+ tmp_memclock = cgpu->gpu_memclock/1000;
+ }
+ }
+ clock += tmp_clock;
+ mem_clock += tmp_memclock;
+ tempC += gpu_temp(cgpu);
+ fanpercent += gpu_fanpercent(cgpu);
+ power += gpu_power(cgpu);
+ counter++;
+
+ usleep(50000);
+ if (abort_flag) goto abort;
+
+ } while (cgpu->monitor.sampling_flag && (--max_loops));
+
+ cgpu->monitor.gpu_temp = (uint32_t) (tempC/counter);
+ cgpu->monitor.gpu_fan = fanpercent/counter;
+ cgpu->monitor.gpu_power = power/counter;
+ cgpu->monitor.gpu_clock = (uint32_t) (clock/counter);
+ cgpu->monitor.gpu_memclock = (uint32_t) (mem_clock/counter);
+
+ if (power) {
+ khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]);
+ khs_per_watt = khs_per_watt / ((double)power / counter);
+ format_hashrate(khs_per_watt * 1000, khw);
+ if (strlen(khw))
+ sprintf(&khw[strlen(khw)-1], "W %uW ", cgpu->monitor.gpu_power / 1000);
+ }
+
+ if (opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60) {
+ gpulog(LOG_INFO, thr_id, "%u MHz %s%uC FAN %u%%",
+ cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/,
+ khw, cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan
+ );
+ cgpu->monitor.tm_displayed = (uint32_t)time(NULL);
+ }
+
+ pthread_mutex_unlock(&cgpu->monitor.lock);
+ }
+ usleep(500); // safety
+ }
+abort:
+ if (opt_debug_threads)
+ applog(LOG_DEBUG, "%s() died", __func__);
+ return NULL;
+}
+#endif
diff --git a/nvml.h b/nvml.h
index 71ff20b0..96547f18 100644
--- a/nvml.h
+++ b/nvml.h
@@ -1,27 +1,32 @@
/*
- * A trivial little dlopen()-based wrapper library for the
- * NVIDIA NVML library, to allow runtime discovery of NVML on an
- * arbitrary system. This is all very hackish and simple-minded, but
- * it serves my immediate needs in the short term until NVIDIA provides
- * a static NVML wrapper library themselves, hopefully in
- * CUDA 6.5 or maybe sometime shortly after.
- *
- * This trivial code is made available under the "new" 3-clause BSD license,
- * and/or any of the GPL licenses you prefer.
- * Feel free to use the code and modify as you see fit.
- *
- * John E. Stone - john.stone@gmail.com
- *
- */
+* A trivial little dlopen()-based wrapper library for the
+* NVIDIA NVML library, to allow runtime discovery of NVML on an
+* arbitrary system. This is all very hackish and simple-minded, but
+* it serves my immediate needs in the short term until NVIDIA provides
+* a static NVML wrapper library themselves, hopefully in
+* CUDA 6.5 or maybe sometime shortly after.
+*
+* This trivial code is made available under the "new" 3-clause BSD license,
+* and/or any of the GPL licenses you prefer.
+* Feel free to use the code and modify as you see fit.
+*
+* John E. Stone - john.stone@gmail.com
+*
+*/
#ifdef USE_WRAPNVML
#include "miner.h"
+void *monitor_thread(void *userdata);
+
typedef void * nvmlDevice_t;
+#define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 16
+
/* our own version of the PCI info struct */
-typedef struct {
- char bus_id_str[16]; /* string form of bus info */
+typedef struct
+{
+ char bus_id_str[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE];
unsigned int domain;
unsigned int bus;
unsigned int device;
@@ -33,19 +38,22 @@ typedef struct {
unsigned int res3;
} nvmlPciInfo_t;
-enum nvmlEnableState_t {
+enum nvmlEnableState_t
+{
NVML_FEATURE_DISABLED = 0,
NVML_FEATURE_ENABLED = 1,
NVML_FEATURE_UNKNOWN = 2
};
-enum nvmlRestrictedAPI_t {
+enum nvmlRestrictedAPI_t
+{
NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0,
- NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1,
+ NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1, // not for GTX cards
NVML_RESTRICTED_API_COUNT = 2
};
-enum nvmlReturn_t {
+enum nvmlReturn_t
+{
NVML_SUCCESS = 0,
NVML_ERROR_UNINITIALIZED = 1,
NVML_ERROR_INVALID_ARGUMENT = 2,
@@ -57,22 +65,45 @@ enum nvmlReturn_t {
NVML_ERROR_INSUFFICIENT_POWER = 8,
NVML_ERROR_DRIVER_NOT_LOADED = 9,
NVML_ERROR_TIMEOUT = 10,
+ NVML_ERROR_IRQ_ISSUE = 11,
+ NVML_ERROR_LIBRARY_NOT_FOUND = 12,
+ NVML_ERROR_FUNCTION_NOT_FOUND = 13,
+ NVML_ERROR_CORRUPTED_INFOROM = 14,
+ NVML_ERROR_GPU_IS_LOST = 15,
+ NVML_ERROR_RESET_REQUIRED = 16,
+ NVML_ERROR_OPERATING_SYSTEM = 17,
+ NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18,
+ NVML_ERROR_IN_USE = 19,
NVML_ERROR_UNKNOWN = 999
};
-enum nvmlClockType_t {
+enum nvmlClockType_t
+{
NVML_CLOCK_GRAPHICS = 0,
NVML_CLOCK_SM = 1,
- NVML_CLOCK_MEM = 2
+ NVML_CLOCK_MEM = 2,
+ NVML_CLOCK_VIDEO = 3,
+ NVML_CLOCK_COUNT
+};
+
+enum nvmlClockId_t
+{
+ NVML_CLOCK_ID_CURRENT = 0,
+ NVML_CLOCK_ID_APP_CLOCK_TARGET = 1,
+ NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2,
+ NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3,
+ NVML_CLOCK_ID_COUNT
};
-enum nvmlPcieUtilCounter_t {
+enum nvmlPcieUtilCounter_t
+{
NVML_PCIE_UTIL_TX_BYTES = 0,
NVML_PCIE_UTIL_RX_BYTES = 1,
NVML_PCIE_UTIL_COUNT
};
-enum nvmlValueType_t {
+enum nvmlValueType_t
+{
NVML_VALUE_TYPE_DOUBLE = 0,
NVML_VALUE_TYPE_UNSIGNED_INT = 1,
NVML_VALUE_TYPE_UNSIGNED_LONG = 2,
@@ -80,77 +111,104 @@ enum nvmlValueType_t {
NVML_VALUE_TYPE_COUNT
};
+typedef int nvmlGpuTopologyLevel_t;
+typedef int nvmlNvLinkCapability_t;
+typedef int nvmlNvLinkErrorCounter_t;
+typedef int nvmlNvLinkUtilizationControl_t;
+
#define NVML_DEVICE_SERIAL_BUFFER_SIZE 30
#define NVML_DEVICE_UUID_BUFFER_SIZE 80
#define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32
/*
- * Handle to hold the function pointers for the entry points we need,
- * and the shared library itself.
- */
-typedef struct {
+* Handle to hold the function pointers for the entry points we need,
+* and the shared library itself.
+*/
+typedef struct
+{
void *nvml_dll;
int nvml_gpucount;
int cuda_gpucount;
unsigned int *nvml_pci_domain_id;
unsigned int *nvml_pci_bus_id;
unsigned int *nvml_pci_device_id;
+ unsigned int *nvml_pci_vendor_id;
unsigned int *nvml_pci_subsys_id;
int *nvml_cuda_device_id; /* map NVML dev to CUDA dev */
int *cuda_nvml_device_id; /* map CUDA dev to NVML dev */
nvmlDevice_t *devs;
nvmlEnableState_t *app_clocks;
- nvmlReturn_t (*nvmlInit)(void);
- nvmlReturn_t (*nvmlDeviceGetCount)(int *);
- nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(int, nvmlDevice_t *);
- nvmlReturn_t (*nvmlDeviceGetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *);
- nvmlReturn_t (*nvmlDeviceSetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t);
- nvmlReturn_t (*nvmlDeviceGetDefaultApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
- nvmlReturn_t (*nvmlDeviceGetApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
- nvmlReturn_t (*nvmlDeviceSetApplicationsClocks)(nvmlDevice_t, unsigned int, unsigned int);
- nvmlReturn_t (*nvmlDeviceResetApplicationsClocks)(nvmlDevice_t);
- nvmlReturn_t (*nvmlDeviceGetSupportedGraphicsClocks)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *arr);
- nvmlReturn_t (*nvmlDeviceGetSupportedMemoryClocks)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz);
- nvmlReturn_t (*nvmlDeviceGetClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
- nvmlReturn_t (*nvmlDeviceGetMaxClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
- nvmlReturn_t (*nvmlDeviceGetPowerManagementDefaultLimit)(nvmlDevice_t, unsigned int *limit);
- nvmlReturn_t (*nvmlDeviceGetPowerManagementLimit)(nvmlDevice_t, unsigned int *limit);
- nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitConstraints)(nvmlDevice_t, unsigned int *min, unsigned int *max);
- nvmlReturn_t (*nvmlDeviceSetPowerManagementLimit)(nvmlDevice_t device, unsigned int limit);
- nvmlReturn_t (*nvmlDeviceGetPciInfo)(nvmlDevice_t, nvmlPciInfo_t *);
- nvmlReturn_t (*nvmlDeviceGetCurrPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen);
- nvmlReturn_t (*nvmlDeviceGetCurrPcieLinkWidth)(nvmlDevice_t device, unsigned int *width);
- nvmlReturn_t (*nvmlDeviceGetMaxPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen);
- nvmlReturn_t (*nvmlDeviceGetMaxPcieLinkWidth)(nvmlDevice_t device, unsigned int *width);
- nvmlReturn_t (*nvmlDeviceGetName)(nvmlDevice_t, char *, int);
- nvmlReturn_t (*nvmlDeviceGetTemperature)(nvmlDevice_t, int, unsigned int *);
- nvmlReturn_t (*nvmlDeviceGetFanSpeed)(nvmlDevice_t, unsigned int *);
- nvmlReturn_t (*nvmlDeviceGetPerformanceState)(nvmlDevice_t, int *); /* enum */
- nvmlReturn_t (*nvmlDeviceGetPowerUsage)(nvmlDevice_t, unsigned int *);
- nvmlReturn_t (*nvmlDeviceGetSerial)(nvmlDevice_t, char *serial, unsigned int len);
- nvmlReturn_t (*nvmlDeviceGetUUID)(nvmlDevice_t, char *uuid, unsigned int len);
- nvmlReturn_t (*nvmlDeviceGetVbiosVersion)(nvmlDevice_t, char *version, unsigned int len);
- nvmlReturn_t (*nvmlSystemGetDriverVersion)(char *version, unsigned int len);
+ nvmlReturn_t(*nvmlInit)(void);
+ nvmlReturn_t(*nvmlDeviceGetCount)(int *);
+ nvmlReturn_t(*nvmlDeviceGetHandleByIndex)(int, nvmlDevice_t *);
+ nvmlReturn_t(*nvmlDeviceGetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *);
+ nvmlReturn_t(*nvmlDeviceSetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t);
+ nvmlReturn_t(*nvmlDeviceGetDefaultApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
+ nvmlReturn_t(*nvmlDeviceGetApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
+ nvmlReturn_t(*nvmlDeviceSetApplicationsClocks)(nvmlDevice_t, unsigned int, unsigned int);
+ nvmlReturn_t(*nvmlDeviceResetApplicationsClocks)(nvmlDevice_t);
+ nvmlReturn_t(*nvmlDeviceGetSupportedGraphicsClocks)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *arr);
+ nvmlReturn_t(*nvmlDeviceGetSupportedMemoryClocks)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz);
+ nvmlReturn_t(*nvmlDeviceGetClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
+ nvmlReturn_t(*nvmlDeviceGetMaxClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
+ nvmlReturn_t(*nvmlDeviceGetPowerManagementDefaultLimit)(nvmlDevice_t, unsigned int *limit);
+ nvmlReturn_t(*nvmlDeviceGetPowerManagementLimit)(nvmlDevice_t, unsigned int *limit);
+ nvmlReturn_t(*nvmlDeviceGetPowerManagementLimitConstraints)(nvmlDevice_t, unsigned int *min, unsigned int *max);
+ nvmlReturn_t(*nvmlDeviceSetPowerManagementLimit)(nvmlDevice_t device, unsigned int limit);
+ nvmlReturn_t(*nvmlDeviceGetPciInfo)(nvmlDevice_t, nvmlPciInfo_t *);
+ nvmlReturn_t(*nvmlDeviceGetCurrPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen);
+ nvmlReturn_t(*nvmlDeviceGetCurrPcieLinkWidth)(nvmlDevice_t device, unsigned int *width);
+ nvmlReturn_t(*nvmlDeviceGetMaxPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen);
+ nvmlReturn_t(*nvmlDeviceGetMaxPcieLinkWidth)(nvmlDevice_t device, unsigned int *width);
+ nvmlReturn_t(*nvmlDeviceGetName)(nvmlDevice_t, char *, int);
+ nvmlReturn_t(*nvmlDeviceGetTemperature)(nvmlDevice_t, int, unsigned int *);
+ nvmlReturn_t(*nvmlDeviceGetFanSpeed)(nvmlDevice_t, unsigned int *);
+ nvmlReturn_t(*nvmlDeviceGetPerformanceState)(nvmlDevice_t, int *); /* enum */
+ nvmlReturn_t(*nvmlDeviceGetPowerUsage)(nvmlDevice_t, unsigned int *);
+ nvmlReturn_t(*nvmlDeviceGetSerial)(nvmlDevice_t, char *serial, unsigned int len);
+ nvmlReturn_t(*nvmlDeviceGetUUID)(nvmlDevice_t, char *uuid, unsigned int len);
+ nvmlReturn_t(*nvmlDeviceGetVbiosVersion)(nvmlDevice_t, char *version, unsigned int len);
+ nvmlReturn_t(*nvmlSystemGetDriverVersion)(char *version, unsigned int len);
char* (*nvmlErrorString)(nvmlReturn_t);
- nvmlReturn_t (*nvmlShutdown)(void);
+ nvmlReturn_t(*nvmlShutdown)(void);
// v331
- nvmlReturn_t (*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit);
+ nvmlReturn_t(*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit);
// v340
- //nvmlReturn_t (*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet);
- //nvmlReturn_t (*nvmlDeviceSetCpuAffinity)(nvmlDevice_t);
- //nvmlReturn_t (*nvmlDeviceGetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled);
- //nvmlReturn_t (*nvmlDeviceSetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t enabled);
+#ifdef __linux__
+ nvmlReturn_t(*nvmlDeviceClearCpuAffinity)(nvmlDevice_t);
+ nvmlReturn_t(*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet);
+ nvmlReturn_t(*nvmlDeviceSetCpuAffinity)(nvmlDevice_t);
+#endif
// v346
- nvmlReturn_t (*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value);
-} nvml_handle;
+ nvmlReturn_t(*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value);
+ // v36x (API 8)
+ nvmlReturn_t(*nvmlDeviceGetClock)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz);
+#ifdef __linux__
+ nvmlReturn_t(*nvmlSystemGetTopologyGpuSet)(unsigned int cpuNumber, unsigned int *count, nvmlDevice_t *deviceArray);
+ nvmlReturn_t(*nvmlDeviceGetTopologyNearestGpus)(nvmlDevice_t, nvmlGpuTopologyLevel_t level, unsigned int *count, nvmlDevice_t *deviceArray);
+ nvmlReturn_t(*nvmlDeviceGetTopologyCommonAncestor)(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuTopologyLevel_t *pathInfo);
+#endif
+ nvmlReturn_t(*nvmlDeviceGetNvLinkState)(nvmlDevice_t, unsigned int link, nvmlEnableState_t *isActive);
+ nvmlReturn_t(*nvmlDeviceGetNvLinkVersion)(nvmlDevice_t, unsigned int link, unsigned int *version);
+ nvmlReturn_t(*nvmlDeviceGetNvLinkCapability)(nvmlDevice_t, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int *capResult);
+ nvmlReturn_t(*nvmlDeviceGetNvLinkRemotePciInfo)(nvmlDevice_t, unsigned int link, nvmlPciInfo_t *pci);
+ nvmlReturn_t(*nvmlDeviceGetNvLinkErrorCounter)(nvmlDevice_t, unsigned int link, nvmlNvLinkErrorCounter_t counter, unsigned long long *counterValue);
+ nvmlReturn_t(*nvmlDeviceResetNvLinkErrorCounters)(nvmlDevice_t, unsigned int link);
+ nvmlReturn_t(*nvmlDeviceSetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control, unsigned int reset);
+ nvmlReturn_t(*nvmlDeviceGetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control);
+ nvmlReturn_t(*nvmlDeviceGetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, unsigned long long *rxcounter, unsigned long long *txcounter);
+ nvmlReturn_t(*nvmlDeviceFreezeNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlEnableState_t freeze);
+ nvmlReturn_t(*nvmlDeviceResetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter);
+} nvml_handle;
nvml_handle * nvml_create();
int nvml_destroy(nvml_handle *nvmlh);
-/*
- * Query the number of GPUs seen by NVML
- */
+// Debug informations
+void nvml_print_device_info(int dev_id);
+
+// Query the number of GPUs seen by NVML
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount);
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id);
@@ -165,20 +223,52 @@ unsigned int gpu_fanpercent(struct cgpu_info *gpu);
unsigned int gpu_fanrpm(struct cgpu_info *gpu);
float gpu_temp(struct cgpu_info *gpu);
unsigned int gpu_power(struct cgpu_info *gpu);
+unsigned int gpu_plimit(struct cgpu_info *gpu);
int gpu_pstate(struct cgpu_info *gpu);
int gpu_busid(struct cgpu_info *gpu);
-unsigned int gpu_power(struct cgpu_info *gpu);
-unsigned int gpu_plimit(struct cgpu_info *gpu);
-/* pid/vid, sn and bios rev */
+// pid/vid, sn and bios rev
int gpu_info(struct cgpu_info *gpu);
-int gpu_vendor(uint8_t pci_bus_id, char *vendorname);
+int gpu_vendor(uint8_t pci_bus_id, char *vendorname);
/* nvapi functions */
#ifdef WIN32
int nvapi_init();
+int nvapi_init_settings();
+
+// to debug nvapi..
+int nvapi_pstateinfo(unsigned int devNum);
+uint8_t nvapi_get_plimit(unsigned int devNum);
+
+// nvapi devNum from dev_id (cuda GPU #N)
+unsigned int nvapi_devnum(int dev_id);
+int nvapi_devid(unsigned int devNum);
+
+void nvapi_toggle_clocks(int thr_id, bool enable);
+
+// cuda Replacement for 6.5 compat
int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total);
#endif
#endif /* USE_WRAPNVML */
+
+void gpu_led_on(int dev_id);
+void gpu_led_percent(int dev_id, int percent);
+void gpu_led_off(int dev_id);
+
+#define LED_MODE_OFF 0
+#define LED_MODE_SHARES 1
+#define LED_MODE_MINING 2
+
+/* ------ nvidia-settings stuff for linux -------------------- */
+
+int nvs_init();
+int nvs_set_clocks(int dev_id);
+void nvs_reset_clocks(int dev_id);
+
+// nvidia-settings (X) devNum from dev_id (cuda GPU #N)
+int8_t nvs_devnum(int dev_id);
+int nvs_devid(int8_t devNum);
+
+extern bool need_nvsettings;
\ No newline at end of file
diff --git a/nvsettings.cpp b/nvsettings.cpp
new file mode 100644
index 00000000..5ea32338
--- /dev/null
+++ b/nvsettings.cpp
@@ -0,0 +1,251 @@
+/**
+ * nvidia-settings command line interface for linux - tpruvot 2017
+ *
+ * Notes: need X setup and running, with an opened X session.
+ * init speed could be improved, running multiple threads
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include // pid_t
+
+#include "miner.h"
+#include "nvml.h"
+#include "cuda_runtime.h"
+
+#ifdef __linux__
+
+#define NVS_PATH "/usr/bin/nvidia-settings"
+
+static int8_t nvs_dev_map[MAX_GPUS] = { 0 };
+static uint8_t nvs_bus_ids[MAX_GPUS] = { 0 };
+static int32_t nvs_clocks_set[MAX_GPUS] = { 0 };
+
+extern int32_t device_mem_offsets[MAX_GPUS];
+
+#if 0 /* complicated exec way and not better in fine */
+int nvs_query_fork_int(int nvs_id, const char* field)
+{
+ pid_t pid;
+ int pipes[2] = { 0 };
+ if (pipe(pipes) < 0)
+ return -1;
+
+ if ((pid = fork()) == -1) {
+ close(pipes[0]);
+ close(pipes[1]);
+ return -1;
+ } else if (pid == 0) {
+ char gpu_field[128] = { 0 };
+ sprintf(gpu_field, "[gpu:%d]/%s", nvs_id, field);
+
+ dup2(pipes[1], STDOUT_FILENO);
+ close(pipes[0]);
+ //close(pipes[1]);
+
+ if (-1 == execl(NVS_PATH, "nvidia-settings", "-q", gpu_field, "-t", NULL)) {
+ exit(-1);
+ }
+ } else {
+ int intval = -1;
+ FILE *p = fdopen(pipes[0], "r");
+ close(pipes[1]);
+ if (!p) {
+ applog(LOG_WARNING, "%s: fdopen(%d) failed", __func__, pipes[0]);
+ return -1;
+ }
+ int rc = fscanf(p, "%d", &intval); // BUS 0000:2a:00.0 is read 42
+ if (rc > 0) {
+ //applog(LOG_BLUE, "%s res=%d", field, intval);
+ }
+ fclose(p);
+ close(pipes[0]);
+ return intval;
+ }
+ return -1;
+}
+#endif
+
+int nvs_query_int(int nvs_id, const char* field, int showerr)
+{
+ FILE *fp;
+ char command[256] = { 0 };
+ sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
+ fp = popen(command, "r");
+ if (fp) {
+ int intval = -1;
+ if (!showerr) {
+ int b = fscanf(fp, "%d", &intval);
+ if (!b) {
+ pclose(fp);
+ return -1;
+ }
+ } else {
+ char msg[512] = { 0 };
+ char buf[64] = { 0 };
+ ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
+ while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+ len += snprintf(&msg[len], maxlen-len, "%s ", buf);
+ if (len >= maxlen) break;
+ }
+ if (strstr(msg, "ERROR")) {
+ char *xtra = strstr(msg, "; please run");
+ if (xtra) *xtra = '\0'; // strip noise
+ applog(LOG_INFO, "%s", msg);
+ intval = -1;
+ } else {
+ sscanf(msg, "%d", &intval);
+ }
+ }
+ pclose(fp);
+ return intval;
+ }
+ return -1;
+}
+
+int nvs_query_str(int nvs_id, const char* field, char* output, size_t maxlen)
+{
+ FILE *fp;
+ char command[256] = { 0 };
+ *output = '\0';
+ sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
+ fp = popen(command, "r");
+ if (fp) {
+ char buf[256] = { 0 };
+ ssize_t len=0;
+ ssize_t bytes=0;
+ while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+ //applog(LOG_BLUE, "%d %s %d", nvs_id, buf, (int) bytes);
+ len += snprintf(&output[len], maxlen-len, "%s ", buf);
+ if (len >= maxlen) break;
+ }
+ pclose(fp);
+ if (strstr(output, "ERROR")) {
+ char *xtra = strstr(output, "; please run");
+ if (xtra) *xtra = '\0'; // strip noise
+ applog(LOG_INFO, "%s", output);
+ *output='\0';
+ }
+ return (int) len;
+ }
+ return -1;
+}
+
+int nvs_set_int(int nvs_id, const char* field, int value)
+{
+ FILE *fp;
+ char command[256] = { 0 };
+ int res = -1;
+ snprintf(command, 256, "%s -a '[gpu:%d]/%s=%d' 2>&1", NVS_PATH, nvs_id, field, value);
+ fp = popen(command, "r");
+ if (fp) {
+ char msg[512] = { 0 };
+ char buf[64] = { 0 };
+ ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
+ while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+ len += snprintf(&msg[len], maxlen-len, "%s ", buf);
+ if (len >= maxlen) break;
+ }
+ if (strstr(msg, "ERROR")) {
+ char *xtra = strstr(msg, "; please run");
+ if (xtra) *xtra = '\0'; // strip noise
+ applog(LOG_INFO, "%s", msg);
+ } else
+ res = 0;
+ pclose(fp);
+ }
+ return res;
+}
+
+int8_t nvs_devnum(int dev_id)
+{
+ return nvs_dev_map[dev_id];
+}
+
+int nvs_devid(int8_t nvs_id)
+{
+ for (int i=0; i < opt_n_threads; i++) {
+ int dev_id = device_map[i % MAX_GPUS];
+ if (nvs_dev_map[dev_id] == nvs_id)
+ return dev_id;
+ }
+ return 0;
+}
+
+int nvs_init()
+{
+ struct stat info;
+ struct timeval tv_start, tv_end, diff;
+ int x_devices = 0;
+ int n_threads = opt_n_threads;
+ if (stat(NVS_PATH, &info))
+ return -ENOENT;
+
+ gettimeofday(&tv_start, NULL);
+
+ for (int d = 0; d < MAX_GPUS; d++) {
+ // this part can be "slow" (100-200ms per device)
+ int res = nvs_query_int(d, "PCIBus", 1);
+ if (res < 0) break;
+ nvs_bus_ids[d] = 0xFFu & res;
+ x_devices++;
+ }
+
+ if (opt_debug) {
+ gettimeofday(&tv_end, NULL);
+ timeval_subtract(&diff, &tv_end, &tv_start);
+ applog(LOG_DEBUG, "nvidia-settings pci bus queries took %.2f ms",
+ (1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec));
+ }
+
+ if (!x_devices)
+ return -ENODEV;
+ if (!n_threads) n_threads = cuda_num_devices();
+ for (int i = 0; i < n_threads; i++) {
+ int dev_id = device_map[i % MAX_GPUS];
+ cudaDeviceProp props;
+ if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
+ for (int8_t d = 0; d < x_devices; d++) {
+ if (nvs_bus_ids[d] == (uint8_t) props.pciBusID) {
+ gpulog(LOG_DEBUG, i, "matches X gpu:%d by busId %u",
+ (int) d, (uint) nvs_bus_ids[d]);
+ nvs_dev_map[dev_id] = d;
+ /* char buf[1024] = { 0 };
+ nvs_query_str(d, "GPUCurrentClockFreqsString", buf, sizeof(buf)-1);
+ gpulog(LOG_DEBUG, d, "%s", buf); */
+ break;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+int nvs_set_clocks(int dev_id)
+{
+ int res;
+ int8_t d = nvs_devnum(dev_id);
+ if (d < 0) return -ENODEV;
+ if (!device_mem_offsets[dev_id] || nvs_clocks_set[d]) return 0;
+ res = nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", device_mem_offsets[dev_id]*2);
+ if (res) nvs_clocks_set[d] = device_mem_offsets[dev_id]*2;
+ return res;
+}
+
+void nvs_reset_clocks(int dev_id)
+{
+ int8_t d = nvs_devnum(dev_id);
+ if (d < 0 || !nvs_clocks_set[d]) return;
+ nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", 0);
+ nvs_clocks_set[d] = 0;
+}
+
+#else
+int nvs_init() { return -ENOSYS; }
+int nvs_set_clocks(int dev_id) { return -ENOSYS; }
+void nvs_reset_clocks(int dev_id) { }
+#endif
diff --git a/util.cpp b/util.cpp
index 37caf690..5b0d1a1a 100644
--- a/util.cpp
+++ b/util.cpp
@@ -173,6 +173,40 @@ void applog(int prio, const char *fmt, ...)
va_end(ap);
}
+extern int gpu_threads;
+// Use different prefix if multiple cpu threads per gpu
+// Also, auto hide LOG_DEBUG if --debug (-D) is not used
+void gpulog(int prio, int thr_id, const char *fmt, ...)
+{
+ char _ALIGN(128) pfmt[128];
+ char _ALIGN(128) line[256];
+ int len, dev_id = device_map[thr_id % MAX_GPUS];
+ va_list ap;
+
+ if(prio == LOG_DEBUG && !opt_debug)
+ return;
+
+ if(gpu_threads > 1)
+ len = snprintf(pfmt, 128, "GPU T%d: %s", thr_id, fmt);
+ else
+ len = snprintf(pfmt, 128, "GPU #%d: %s", dev_id, fmt);
+ pfmt[sizeof(pfmt) - 1] = '\0';
+
+ va_start(ap, fmt);
+
+ if(len && vsnprintf(line, sizeof(line), pfmt, ap))
+ {
+ line[sizeof(line) - 1] = '\0';
+ applog(prio, "%s", line);
+ }
+ else
+ {
+ fprintf(stderr, "%s OOM!\n", __func__);
+ }
+
+ va_end(ap);
+}
+
void format_hashrate(double hashrate, char *output)
{
char prefix = '\0';