From d3eab583be00222c3ecbafa565d534f70a61a669 Mon Sep 17 00:00:00 2001
From: KlausT <kto@gmx.de>
Date: Fri, 12 Jan 2018 04:11:23 +0100
Subject: [PATCH 1/2] trying to fix --mem-clock and similar options

---
 ccminer.cpp             |   38 +-
 ccminer.vcxproj         |    1 +
 ccminer.vcxproj.filters |    3 +
 cuda.cpp                |    8 +
 miner.h                 |   12 +-
 nvml.cpp                | 2505 ++++++++++++++++++++++++++++-----------
 nvml.h                  |  238 ++--
 nvsettings.cpp          |  251 ++++
 util.cpp                |   34 +
 9 files changed, 2290 insertions(+), 800 deletions(-)
 create mode 100644 nvsettings.cpp

diff --git a/ccminer.cpp b/ccminer.cpp
index da87dc43..39111d28 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -71,6 +71,7 @@ void cuda_devicereset();
 int cuda_finddevice(char *name);
 void cuda_print_devices();
 void cuda_get_device_sm();
+void cuda_reset_device(int thr_id, bool *init);
 
 #include "nvml.h"
 #ifdef USE_WRAPNVML
@@ -90,6 +91,11 @@ struct workio_cmd {
 	} u;
 };
 
+bool opt_debug_diff = false;
+bool opt_debug_threads = false;
+bool opt_showdiff = true;
+bool opt_hwmonitor = true;
+
 static const char *algo_names[] = {
 	"bitcoin",
 	"blake",
@@ -149,6 +155,7 @@ static json_t *opt_config = nullptr;
 static const bool opt_time = true;
 enum sha_algos opt_algo;
 int opt_n_threads = 0;
+int gpu_threads = 1;
 int opt_affinity = -1;
 int opt_priority = 0;
 static double opt_difficulty = 1; // CH
@@ -156,14 +163,20 @@ static bool opt_extranonce = true;
 bool opt_trust_pool = false;
 int num_cpus;
 int active_gpus;
+bool need_nvsettings = false;
+bool need_memclockrst = false;
 char * device_name[MAX_GPUS] = { nullptr };
 int device_map[MAX_GPUS] = { 0 };
 long  device_sm[MAX_GPUS] = { 0 };
 uint32_t gpus_intensity[MAX_GPUS] = {0};
+int32_t device_mem_offsets[MAX_GPUS] = {0};
 uint32_t device_gpu_clocks[MAX_GPUS] = {0};
 uint32_t device_mem_clocks[MAX_GPUS] = {0};
 uint32_t device_plimit[MAX_GPUS] = {0};
 int8_t device_pstate[MAX_GPUS];
+int32_t device_led[MAX_GPUS] = {-1, -1};
+int opt_led_mode = 0;
+uint8_t device_tlimit[MAX_GPUS] = {0};
 char *rpc_user = NULL;
 static char *rpc_url = nullptr;
 static char *rpc_userpass = nullptr;
@@ -179,16 +192,17 @@ int longpoll_thr_id = -1;
 int stratum_thr_id = -1;
 int api_thr_id = -1;
 bool stratum_need_reset = false;
+volatile bool abort_flag = false;
 struct work_restart *work_restart = NULL;
 struct stratum_ctx stratum = { 0 };
 bool stop_mining = false;
 volatile bool mining_has_stopped[MAX_GPUS];
 
 pthread_mutex_t applog_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
 uint32_t accepted_count = 0L;
 uint32_t rejected_count = 0L;
-static double thr_hashrates[MAX_GPUS];
+double thr_hashrates[MAX_GPUS];
 uint64_t global_hashrate = 0;
 double   global_diff = 0.0;
 uint64_t net_hashrate = 0;
@@ -292,7 +306,7 @@ Options:\n\
       --mem-clock=N     Set the gpu memory max clock (346.72+ driver)\n\
       --gpu-clock=N     Set the gpu engine max clock (346.72+ driver)\n\
       --pstate=N        Set the gpu power state (352.21+ driver)\n\
-			--plimit=N        Set the gpu power limit(352.21 + driver)\n"
+      --plimit=N        Set the gpu power limit(352.21 + driver)\n"
 #endif
 "";
 
@@ -2805,11 +2819,23 @@ int main(int argc, char *argv[])
 	if(!hnvml && nvapi_init() == 0)
 	{
 		applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
-		cuda_devicenames(); // refresh gpu vendor name
+		if(!hnvml)
+		{
+			cuda_devicenames(); // refresh gpu vendor name
+		}
+		nvapi_init_settings();
 	}
 #endif
 	else if(!hnvml)
 		applog(LOG_INFO, "GPU monitoring is not available.");
+	// force reinit to set default device flags
+	if(!hnvml)
+	{
+		for(int n = 0; n < active_gpus; n++)
+		{
+			cuda_reset_device(n, NULL);
+		}
+	}
 #endif
 
 	if(opt_protocol)
@@ -3022,11 +3048,11 @@ int main(int argc, char *argv[])
 				gpu_reinit = true;
 			if(nvml_set_plimit(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
-			if(nvml_set_clocks(hnvml, device_map[n]) == 1)
+			if(!is_windows() && nvml_set_clocks(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
 			if(gpu_reinit)
 			{
-//				cuda_reset_device(n, NULL);
+				cuda_reset_device(n, NULL);
 			}
 		}
 	}
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 286893af..f7a30123 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -284,6 +284,7 @@
     </CudaCompile>
     <CudaCompile Include="Sia\sia.cu" />
     <ClCompile Include="nvapi.cpp" />
+    <ClCompile Include="nvsettings.cpp" />
     <ClCompile Include="sph\neoscrypt.cpp" />
     <ClCompile Include="sph\sha256_Y.c" />
     <ClCompile Include="sph\sph_sha2.c" />
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index 55ee3a64..caa2c361 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -207,6 +207,9 @@
     <ClCompile Include="nvapi.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="nvsettings.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="compat.h">
diff --git a/cuda.cpp b/cuda.cpp
index 4d26f83d..b65ca872 100644
--- a/cuda.cpp
+++ b/cuda.cpp
@@ -281,3 +281,11 @@ double throughput2intensity(uint32_t throughput)
 	}
 	return intensity;
 }
+
+void cuda_reset_device(int thr_id, bool *init)
+{
+	int dev_id = device_map[thr_id];
+	cudaSetDevice(dev_id);
+	cudaDeviceReset();
+	cudaDeviceSynchronize();
+}
diff --git a/miner.h b/miner.h
index 66575b31..2e5dbce6 100644
--- a/miner.h
+++ b/miner.h
@@ -75,9 +75,11 @@ void *alloca (size_t);
 
 #ifdef HAVE_SYSLOG_H
 #include <syslog.h>
-#define LOG_BLUE 0x10 /* unique value */
+#define LOG_BLUE 0x10
+#define LOG_RAW  0x99
 #else
-enum {
+enum
+{
 	LOG_ERR,
 	LOG_WARNING,
 	LOG_NOTICE,
@@ -85,6 +87,7 @@ enum {
 	LOG_DEBUG,
 	/* custom notices */
 	LOG_BLUE = 0x10,
+	LOG_RAW = 0x99
 };
 #endif
 
@@ -481,6 +484,7 @@ struct thr_info {
 extern int cuda_num_devices();
 extern int cuda_version();
 extern int cuda_gpu_clocks(struct cgpu_info *gpu);
+int cuda_gpu_info(struct cgpu_info *gpu);
 extern bool opt_verify;
 extern bool opt_benchmark;
 extern bool opt_debug;
@@ -507,7 +511,7 @@ extern int longpoll_thr_id;
 extern int stratum_thr_id;
 extern int api_thr_id;
 extern bool opt_trust_pool;
-
+extern volatile bool abort_flag;
 extern uint64_t global_hashrate;
 extern double   global_diff;
 
@@ -515,8 +519,10 @@ extern double   global_diff;
 extern char* device_name[MAX_GPUS];
 extern int device_map[MAX_GPUS];
 extern long  device_sm[MAX_GPUS];
+extern uint32_t device_plimit[MAX_GPUS];
 extern uint32_t gpus_intensity[MAX_GPUS];
 double throughput2intensity(uint32_t throughput);
+extern void gpulog(int prio, int thr_id, const char *fmt, ...);
 
 #define CL_N    "\x1B[0m"
 #define CL_RED  "\x1B[31m"
diff --git a/nvml.cpp b/nvml.cpp
index bce34e99..2bf19d8e 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -1,19 +1,19 @@
 /*
- * A trivial little dlopen()-based wrapper library for the
- * NVIDIA NVML library, to allow runtime discovery of NVML on an
- * arbitrary system.  This is all very hackish and simple-minded, but
- * it serves my immediate needs in the short term until NVIDIA provides
- * a static NVML wrapper library themselves, hopefully in
- * CUDA 6.5 or maybe sometime shortly after.
- *
- * This trivial code is made available under the "new" 3-clause BSD license,
- * and/or any of the GPL licenses you prefer.
- * Feel free to use the code and modify as you see fit.
- *
- * John E. Stone - john.stone@gmail.com
- * Tanguy Pruvot - tpruvot@github
- *
- */
+* A trivial little dlopen()-based wrapper library for the
+* NVIDIA NVML library, to allow runtime discovery of NVML on an
+* arbitrary system.  This is all very hackish and simple-minded, but
+* it serves my immediate needs in the short term until NVIDIA provides
+* a static NVML wrapper library themselves, hopefully in
+* CUDA 6.5 or maybe sometime shortly after.
+*
+* This trivial code is made available under the "new" 3-clause BSD license,
+* and/or any of the GPL licenses you prefer.
+* Feel free to use the code and modify as you see fit.
+*
+* John E. Stone - john.stone@gmail.com
+* Tanguy Pruvot - tpruvot@github
+*
+*/
 
 #include <errno.h>
 #include <stdio.h>
@@ -30,71 +30,79 @@
 extern nvml_handle *hnvml;
 extern char driver_version[32];
 
-static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
+static uint32_t device_bus_ids[MAX_GPUS] = {0};
 
 extern uint32_t device_gpu_clocks[MAX_GPUS];
 extern uint32_t device_mem_clocks[MAX_GPUS];
-extern uint32_t device_plimit[MAX_GPUS];
+extern int32_t device_mem_offsets[MAX_GPUS];
+extern uint8_t device_tlimit[MAX_GPUS];
 extern int8_t device_pstate[MAX_GPUS];
+extern int32_t device_led[MAX_GPUS];
+int32_t device_led_state[MAX_GPUS] = {0};
+static THREAD bool has_rgb_ok = false;
 
-uint32_t clock_prev[MAX_GPUS] = { 0 };
-uint32_t clock_prev_mem[MAX_GPUS] = { 0 };
-uint32_t limit_prev[MAX_GPUS] = { 0 };
-static bool nvml_plimit_set = false;
+uint32_t clock_prev[MAX_GPUS] = {0};
+uint32_t clock_prev_mem[MAX_GPUS] = {0};
+uint32_t limit_prev[MAX_GPUS] = {0};
 
-#ifdef WIN32
-#include "nvapi/nvapi_ccminer.h"
-static int nvapi_dev_map[MAX_GPUS] = {0};
-static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = {0};
-#endif
+static bool nvml_plimit_set = false;
+extern bool need_memclockrst;
 
 /*
- * Wrappers to emulate dlopen() on other systems like Windows
- */
+* Wrappers to emulate dlopen() on other systems like Windows
+*/
 #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
-	#include <windows.h>
-	static void *wrap_dlopen(const char *filename) {
-		HMODULE h = LoadLibrary(filename);
-		if (!h && opt_debug) {
-			applog(LOG_DEBUG, "dlopen(%d): failed to load %s", 
-				GetLastError(), filename);
-		}
-		return (void*)h;
-	}
-	static void *wrap_dlsym(void *h, const char *sym) {
-		return (void *)GetProcAddress((HINSTANCE)h, sym);
-	}
-	static int wrap_dlclose(void *h) {
-		/* FreeLibrary returns nonzero on success */
-		return (!FreeLibrary((HINSTANCE)h));
+#include <windows.h>
+static void *wrap_dlopen(const char *filename)
+{
+	HMODULE h = LoadLibrary(filename);
+	if(!h && opt_debug)
+	{
+		applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
+			   GetLastError(), filename);
 	}
+	return (void*)h;
+}
+static void *wrap_dlsym(void *h, const char *sym)
+{
+	return (void *)GetProcAddress((HINSTANCE)h, sym);
+}
+static int wrap_dlclose(void *h)
+{
+	/* FreeLibrary returns nonzero on success */
+	return (!FreeLibrary((HINSTANCE)h));
+}
 #else
-	/* assume we can use dlopen itself... */
-	#include <dlfcn.h>
-	#include <errno.h>
-	static void *wrap_dlopen(const char *filename) {
-		void *h = dlopen(filename, RTLD_NOW);
-		if (h == NULL && opt_debug) {
-			applog(LOG_DEBUG, "dlopen(%d): failed to load %s", 
-				errno, filename);
-		}
-		return (void*)h;
+/* assume we can use dlopen itself... */
+#include <dlfcn.h>
+#include <errno.h>
+static void *wrap_dlopen(const char *filename)
+{
+	void *h = dlopen(filename, RTLD_NOW);
+	if(h == NULL && opt_debug)
+	{
+		applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
+			   errno, filename);
 	}
+	return (void*)h;
+}
 
-	static void *wrap_dlsym(void *h, const char *sym) {
-		return dlsym(h, sym);
-	}
-	static int wrap_dlclose(void *h) {
-		return dlclose(h);
-	}
+static void *wrap_dlsym(void *h, const char *sym)
+{
+	return dlsym(h, sym);
+}
+static int wrap_dlclose(void *h)
+{
+	return dlclose(h);
+}
 #endif
 
 nvml_handle * nvml_create()
 {
-	int i=0;
+	int i = 0;
 	nvml_handle *nvmlh = NULL;
 
-#if defined(WIN32)
+#ifdef WIN32
 	/* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */
 #define  libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll"
 #else
@@ -110,202 +118,203 @@ nvml_handle * nvml_create()
 #endif
 
 	void *nvml_dll = wrap_dlopen(tmp);
-	if (nvml_dll == NULL) {
+	if(nvml_dll == NULL)
+	{
 #ifdef WIN32
 		nvml_dll = wrap_dlopen("nvml.dll");
-		if (nvml_dll == NULL)
+		if(nvml_dll == NULL)
 #endif
-		return NULL;
+			return NULL;
 	}
 
-	nvmlh = (nvml_handle *) calloc(1, sizeof(nvml_handle));
+	nvmlh = (nvml_handle *)calloc(1, sizeof(nvml_handle));
 
 	nvmlh->nvml_dll = nvml_dll;
 
-	nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
-	if (!nvmlh->nvmlInit)
-		nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
-	nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
-	if (!nvmlh->nvmlDeviceGetCount)
-		nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount");
-	nvmlh->nvmlDeviceGetHandleByIndex = (nvmlReturn_t (*)(int, nvmlDevice_t *))
+	nvmlh->nvmlInit = (nvmlReturn_t(*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
+	if(!nvmlh->nvmlInit)
+		nvmlh->nvmlInit = (nvmlReturn_t(*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
+	nvmlh->nvmlDeviceGetCount = (nvmlReturn_t(*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
+	if(!nvmlh->nvmlDeviceGetCount)
+		nvmlh->nvmlDeviceGetCount = (nvmlReturn_t(*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount");
+	nvmlh->nvmlDeviceGetHandleByIndex = (nvmlReturn_t(*)(int, nvmlDevice_t *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
-	nvmlh->nvmlDeviceGetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *))
+	nvmlh->nvmlDeviceGetAPIRestriction = (nvmlReturn_t(*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAPIRestriction");
-	nvmlh->nvmlDeviceSetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t))
+	nvmlh->nvmlDeviceSetAPIRestriction = (nvmlReturn_t(*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAPIRestriction");
-	nvmlh->nvmlDeviceGetDefaultApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
+	nvmlh->nvmlDeviceGetDefaultApplicationsClock = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetDefaultApplicationsClock");
-	nvmlh->nvmlDeviceGetApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clocks))
+	nvmlh->nvmlDeviceGetApplicationsClock = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clocks))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetApplicationsClock");
-	nvmlh->nvmlDeviceSetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int mem, unsigned int gpu))
+	nvmlh->nvmlDeviceSetApplicationsClocks = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int mem, unsigned int gpu))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetApplicationsClocks");
-	nvmlh->nvmlDeviceResetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t))
+	nvmlh->nvmlDeviceResetApplicationsClocks = (nvmlReturn_t(*)(nvmlDevice_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceResetApplicationsClocks");
-	nvmlh->nvmlDeviceGetSupportedGraphicsClocks = (nvmlReturn_t (*)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *))
+	nvmlh->nvmlDeviceGetSupportedGraphicsClocks = (nvmlReturn_t(*)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedGraphicsClocks");
-	nvmlh->nvmlDeviceGetSupportedMemoryClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz))
+	nvmlh->nvmlDeviceGetSupportedMemoryClocks = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedMemoryClocks");
-	nvmlh->nvmlDeviceGetClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
+	nvmlh->nvmlDeviceGetClockInfo = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo");
-	nvmlh->nvmlDeviceGetMaxClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
+	nvmlh->nvmlDeviceGetMaxClockInfo = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxClockInfo");
-	nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo_v2");
-	if (!nvmlh->nvmlDeviceGetPciInfo)
-		nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
-	nvmlh->nvmlDeviceGetCurrPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen))
+	nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t(*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo_v2");
+	if(!nvmlh->nvmlDeviceGetPciInfo)
+		nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t(*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
+	nvmlh->nvmlDeviceGetCurrPcieLinkGeneration = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *gen))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkGeneration");
-	nvmlh->nvmlDeviceGetCurrPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width))
+	nvmlh->nvmlDeviceGetCurrPcieLinkWidth = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *width))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkWidth");
-	nvmlh->nvmlDeviceGetMaxPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen))
+	nvmlh->nvmlDeviceGetMaxPcieLinkGeneration = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *gen))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkGeneration");
-	nvmlh->nvmlDeviceGetMaxPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width))
+	nvmlh->nvmlDeviceGetMaxPcieLinkWidth = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *width))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkWidth");
-	nvmlh->nvmlDeviceGetPowerUsage = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *))
+	nvmlh->nvmlDeviceGetPowerUsage = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
-	nvmlh->nvmlDeviceGetPowerManagementDefaultLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
+	nvmlh->nvmlDeviceGetPowerManagementDefaultLimit = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *limit))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementDefaultLimit");
-	nvmlh->nvmlDeviceGetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
+	nvmlh->nvmlDeviceGetPowerManagementLimit = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *limit))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimit");
-	nvmlh->nvmlDeviceGetPowerManagementLimitConstraints = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *min, unsigned int *max))
+	nvmlh->nvmlDeviceGetPowerManagementLimitConstraints = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *min, unsigned int *max))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimitConstraints");
-	nvmlh->nvmlDeviceSetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int limit))
+	nvmlh->nvmlDeviceSetPowerManagementLimit = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int limit))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetPowerManagementLimit");
-	nvmlh->nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, int))
+	nvmlh->nvmlDeviceGetName = (nvmlReturn_t(*)(nvmlDevice_t, char *, int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
-	nvmlh->nvmlDeviceGetTemperature = (nvmlReturn_t (*)(nvmlDevice_t, int, unsigned int *))
+	nvmlh->nvmlDeviceGetTemperature = (nvmlReturn_t(*)(nvmlDevice_t, int, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
-	nvmlh->nvmlDeviceGetFanSpeed = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *))
+	nvmlh->nvmlDeviceGetFanSpeed = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
-	nvmlh->nvmlDeviceGetPerformanceState = (nvmlReturn_t (*)(nvmlDevice_t, int *))
+	nvmlh->nvmlDeviceGetPerformanceState = (nvmlReturn_t(*)(nvmlDevice_t, int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPerformanceState"); /* or nvmlDeviceGetPowerState */
-	nvmlh->nvmlDeviceGetSerial = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
+	nvmlh->nvmlDeviceGetSerial = (nvmlReturn_t(*)(nvmlDevice_t, char *, unsigned int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSerial");
-	nvmlh->nvmlDeviceGetUUID = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
+	nvmlh->nvmlDeviceGetUUID = (nvmlReturn_t(*)(nvmlDevice_t, char *, unsigned int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetUUID");
-	nvmlh->nvmlDeviceGetVbiosVersion = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
+	nvmlh->nvmlDeviceGetVbiosVersion = (nvmlReturn_t(*)(nvmlDevice_t, char *, unsigned int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetVbiosVersion");
-	nvmlh->nvmlSystemGetDriverVersion = (nvmlReturn_t (*)(char *, unsigned int))
+	nvmlh->nvmlSystemGetDriverVersion = (nvmlReturn_t(*)(char *, unsigned int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlSystemGetDriverVersion");
 	nvmlh->nvmlErrorString = (char* (*)(nvmlReturn_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString");
-	nvmlh->nvmlShutdown = (nvmlReturn_t (*)())
+	nvmlh->nvmlShutdown = (nvmlReturn_t(*)())
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
 	// v331
-	nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
+	nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *limit))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit");
 	// v340
-	/* NVML_ERROR_NOT_SUPPORTED
-	nvmlh->nvmlDeviceGetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled))
-		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAutoBoostedClocksEnabled");
-	nvmlh->nvmlDeviceSetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t enabled))
-		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAutoBoostedClocksEnabled"); */
+#ifdef __linux__
+	nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t(*)(nvmlDevice_t))
+		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity");
+	nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet))
+		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity");
+	nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t(*)(nvmlDevice_t))
+		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity");
+#endif
 	// v346
-	nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value))
+	nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t(*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput");
-
-	if (nvmlh->nvmlInit == NULL ||
-			nvmlh->nvmlShutdown == NULL ||
-			nvmlh->nvmlErrorString == NULL ||
-			nvmlh->nvmlDeviceGetCount == NULL ||
-			nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
-			nvmlh->nvmlDeviceGetPciInfo == NULL ||
-			nvmlh->nvmlDeviceGetName == NULL)
+	// v36x (API 8 / Pascal)
+	nvmlh->nvmlDeviceGetClock = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz))
+		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock");
+
+	if(nvmlh->nvmlInit == NULL ||
+	   nvmlh->nvmlShutdown == NULL ||
+	   nvmlh->nvmlErrorString == NULL ||
+	   nvmlh->nvmlDeviceGetCount == NULL ||
+	   nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
+	   nvmlh->nvmlDeviceGetPciInfo == NULL ||
+	   nvmlh->nvmlDeviceGetName == NULL)
 	{
-		if (opt_debug)
+		if(opt_debug)
 			applog(LOG_DEBUG, "Failed to obtain required NVML function pointers");
 		wrap_dlclose(nvmlh->nvml_dll);
 		free(nvmlh);
 		return NULL;
 	}
-	nvmlReturn_t rc;
-	rc = nvmlh->nvmlInit();
-	if(rc != NVML_SUCCESS)
-	{
-		applog(LOG_WARNING, "nvmlInit() failed: %s", nvmlh->nvmlErrorString(rc));
-		return NULL;
-	}
 
-	rc = nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version));
-	if(rc != NVML_SUCCESS)
-		applog(LOG_WARNING, "nvmlSystemGetDriverVersion() failed: %s", nvmlh->nvmlErrorString(rc));
-	rc = nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
-	if(rc != NVML_SUCCESS)
-		applog(LOG_WARNING, "nvmlDeviceGetCount() failed: %s", nvmlh->nvmlErrorString(rc));
+	nvmlh->nvmlInit();
+	if(nvmlh->nvmlSystemGetDriverVersion)
+		nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version));
+	nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
 
 	/* Query CUDA device count, in case it doesn't agree with NVML, since  */
 	/* CUDA will only report GPUs with compute capability greater than 1.0 */
-	if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
-		if (opt_debug)
+	if(cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess)
+	{
+		if(opt_debug)
 			applog(LOG_DEBUG, "Failed to query CUDA device count!");
 		wrap_dlclose(nvmlh->nvml_dll);
 		free(nvmlh);
 		return NULL;
 	}
 
-	nvmlh->devs = (nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(nvmlDevice_t));
-	nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_pci_subsys_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
-	nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
-	nvmlh->app_clocks = (nvmlEnableState_t*) calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t));
+	nvmlh->devs = (nvmlDevice_t *)calloc(nvmlh->nvml_gpucount, sizeof(nvmlDevice_t));
+	nvmlh->nvml_pci_domain_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_pci_bus_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_pci_device_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_pci_vendor_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_pci_subsys_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_cuda_device_id = (int*)calloc(nvmlh->nvml_gpucount, sizeof(int));
+	nvmlh->cuda_nvml_device_id = (int*)calloc(nvmlh->cuda_gpucount, sizeof(int));
+	nvmlh->app_clocks = (nvmlEnableState_t*)calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t));
 
 	/* Obtain GPU device handles we're going to need repeatedly... */
-	for (i=0; i<nvmlh->nvml_gpucount; i++)
+	for(i = 0; i<nvmlh->nvml_gpucount; i++)
 	{
-		rc = nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
-		if(rc != NVML_SUCCESS)
-			applog(LOG_WARNING, "GPU %d: nvmlDeviceGetHandleByIndex() failed: %s", i, nvmlh->nvmlErrorString(rc));
+		nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
 	}
 
 	/* Query PCI info for each NVML device, and build table for mapping of */
 	/* CUDA device IDs to NVML device IDs and vice versa                   */
-	for (i=0; i<nvmlh->nvml_gpucount; i++) {
+	for(i = 0; i<nvmlh->nvml_gpucount; i++)
+	{
 		nvmlPciInfo_t pciinfo;
 
 		nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
 		nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
-		nvmlh->nvml_pci_bus_id[i]    = pciinfo.bus;
+		nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
 		nvmlh->nvml_pci_device_id[i] = pciinfo.device;
+		nvmlh->nvml_pci_vendor_id[i] = pciinfo.pci_device_id;
 		nvmlh->nvml_pci_subsys_id[i] = pciinfo.pci_subsystem_id;
 
 		nvmlh->app_clocks[i] = NVML_FEATURE_UNKNOWN;
-		if (nvmlh->nvmlDeviceSetAPIRestriction)
+		if(nvmlh->nvmlDeviceSetAPIRestriction)
 		{
-			rc = nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
-				NVML_FEATURE_ENABLED);
-			if(rc != NVML_SUCCESS && opt_debug)
-				applog(LOG_WARNING, "Device %d: nvmlDeviceSetAPIRestriction() failed: %s", nvmlh->devs[i], nvmlh->nvmlErrorString(rc));
+			nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
+											   NVML_FEATURE_ENABLED);
 			/* there is only this API_SET_APPLICATION_CLOCKS on the 750 Ti (340.58) */
 		}
-		if (nvmlh->nvmlDeviceGetAPIRestriction)
+		if(nvmlh->nvmlDeviceGetAPIRestriction)
 		{
-			rc = nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
-				&nvmlh->app_clocks[i]);
-			if(rc != NVML_SUCCESS)
-				applog(LOG_WARNING, "Device %d: nvmlDeviceGetAPIRestriction() failed: %s", nvmlh->devs[i], nvmlh->nvmlErrorString(rc));
+			nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
+											   &nvmlh->app_clocks[i]);
 		}
 	}
 
 	/* build mapping of NVML device IDs to CUDA IDs */
-	for (i=0; i<nvmlh->nvml_gpucount; i++) {
+	for(i = 0; i<nvmlh->nvml_gpucount; i++)
+	{
 		nvmlh->nvml_cuda_device_id[i] = -1;
 	}
-	for (i=0; i<nvmlh->cuda_gpucount; i++) {
+	for(i = 0; i<nvmlh->cuda_gpucount; i++)
+	{
 		cudaDeviceProp props;
 		nvmlh->cuda_nvml_device_id[i] = -1;
 
-		if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
+		if(cudaGetDeviceProperties(&props, i) == cudaSuccess)
+		{
 			device_bus_ids[i] = props.pciBusID;
-			for (int j = 0; j < nvmlh->nvml_gpucount; j++) {
-				if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
-				    (nvmlh->nvml_pci_bus_id[j]    == (uint32_t) props.pciBusID) &&
-				    (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
-					if (opt_debug)
+			for(int j = 0; j < nvmlh->nvml_gpucount; j++)
+			{
+				if((nvmlh->nvml_pci_domain_id[j] == (uint32_t)props.pciDomainID) &&
+					(nvmlh->nvml_pci_bus_id[j] == (uint32_t)props.pciBusID) &&
+				   (nvmlh->nvml_pci_device_id[j] == (uint32_t)props.pciDeviceID))
+				{
+					if(opt_debug)
 						applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u",
-							i, j, (uint32_t) props.pciBusID);
+							   i, j, (uint32_t)props.pciBusID);
 					nvmlh->nvml_cuda_device_id[j] = i;
 					nvmlh->cuda_nvml_device_id[i] = j;
 				}
@@ -316,120 +325,91 @@ nvml_handle * nvml_create()
 	return nvmlh;
 }
 
-#ifdef WIN32
-// Replacement for WIN32 CUDA 6.5 on pascal
-int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
-{
-	NvAPI_Status ret = NVAPI_OK;
-	NV_DISPLAY_DRIVER_MEMORY_INFO mem = {0};
-	mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
-	unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
-	if((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK)
-	{
-		*total = (uint64_t)mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
-		*free = (uint64_t)mem.curAvailableDedicatedVideoMemory;
-	}
-	return (int)ret;
-}
-#endif
-
-#define MAXCLOCKS 255
 /* apply config clocks to an used device */
 int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 {
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
-	if (n < 0 || n >= nvmlh->nvml_gpucount)
+	//if (need_nvsettings) /* prefer later than init time */
+	//	nvs_set_clocks(dev_id);
+	if(n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (!device_gpu_clocks[dev_id] && !device_mem_clocks[dev_id])
+	if(!device_gpu_clocks[dev_id] && !device_mem_clocks[dev_id])
 		return 0; // nothing to do
 
-	if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
+	if(nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED)
+	{
 		applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", dev_id);
 		return -EPERM;
 	}
 
 	uint32_t mem_prev = clock_prev_mem[dev_id];
 	if(!mem_prev)
-	{
-		rc = nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev);
-		if(rc != NVML_SUCCESS)
-		{
-			applog(LOG_WARNING, "GPU #%d: unable to query memory clock", dev_id);
-			return -1;
-		}
-	}
+		nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev);
 	uint32_t gpu_prev = clock_prev[dev_id];
 	if(!gpu_prev)
-	{
-		rc = nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev);
-		if(rc != NVML_SUCCESS)
-		{
-			applog(LOG_WARNING, "GPU #%d: unable to query graphics clock", dev_id);
-			return -1;
-		}
-	}
+		nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev);
 
-	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
+	nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
+	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
 	if(rc != NVML_SUCCESS)
 	{
-		applog(LOG_WARNING, "GPU #%d: unable to query default memory clock", dev_id);
-		return -1;
-	}
-	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
-	if (rc != NVML_SUCCESS) {
-		applog(LOG_WARNING, "GPU #%d: unable to query default graphics clock", dev_id);
-		return -1;
+		applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
+		return -EINVAL;
 	}
 
-	if (opt_debug)
+	if(opt_debug)
 		applog(LOG_DEBUG, "GPU #%d: default application clocks are %u/%u", dev_id, mem_clk, gpu_clk);
 
 	// get application config values
-	if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
-	if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
-
-	// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
-	uint32_t nclocks = MAXCLOCKS;
-	uint32_t clocks[MAXCLOCKS] = {0};
-
-	rc = nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
-	if(rc != NVML_SUCCESS)
+	if(device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
+	if(device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
+
+	// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
+	uint32_t nclocks = 0, mem_clocks[32] = {0};
+	nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
+	nclocks = min(nclocks, 32);
+	if(nclocks)
+		nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
+	for(uint8_t u = 0; u < nclocks; u++)
 	{
-		applog(LOG_WARNING, "GPU #%d: unable to query supported memory clocks", dev_id);
-		return -1;
-	}
-	for (uint8_t u=0; u < nclocks; u++) {
 		// ordered by pstate (so highest is first memory clock - P0)
-		if(clocks[u] <= mem_clk)
+		if(mem_clocks[u] <= mem_clk)
 		{
-			mem_clk = clocks[u];
+			mem_clk = mem_clocks[u];
 			break;
 		}
 	}
 
-	nclocks = MAXCLOCKS;
-	rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
-	if(rc != NVML_SUCCESS)
+	uint32_t* gpu_clocks = NULL;
+	nclocks = 0;
+	nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
+	if(nclocks)
 	{
-		applog(LOG_WARNING, "GPU #%d: unable to query supported graphics clocks", dev_id);
-		return -1;
-	}
-	for (uint8_t u=0; u < nclocks; u++) {
-		// ordered desc, so get first
-		if (clocks[u] <= gpu_clk) {
-			gpu_clk = clocks[u];
-			break;
+		if(opt_debug)
+			applog(LOG_DEBUG, "GPU #%d: %u clocks found for mem %u", dev_id, nclocks, mem_clk);
+		gpu_clocks = (uint32_t*)calloc(1, sizeof(uint32_t) * nclocks + 4);
+		nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
+		for(uint8_t u = 0; u < nclocks; u++)
+		{
+			// ordered desc, so get first
+			if(gpu_clocks[u] <= gpu_clk)
+			{
+				gpu_clk = gpu_clocks[u];
+				break;
+			}
 		}
+		free(gpu_clocks);
 	}
 
 	rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
-	if (rc == NVML_SUCCESS)
+	if(rc == NVML_SUCCESS)
 		applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
-	else {
-		applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
+	else
+	{
+		applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
 		return -1;
 	}
 
@@ -446,123 +426,238 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
-	if (n < 0 || n >= nvmlh->nvml_gpucount)
+	if(need_nvsettings)
+		nvs_reset_clocks(dev_id);
+	if(n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (clock_prev[dev_id]) {
+	if(clock_prev[dev_id])
+	{
 		rc = nvmlh->nvmlDeviceResetApplicationsClocks(nvmlh->devs[n]);
-		if (rc != NVML_SUCCESS) {
+		if(rc != NVML_SUCCESS)
+		{
 			applog(LOG_WARNING, "GPU #%d: unable to reset application clocks", dev_id);
 		}
 		clock_prev[dev_id] = 0;
 		ret = 1;
 	}
 
-	if (limit_prev[dev_id]) {
+	if(limit_prev[dev_id])
+	{
 		uint32_t plimit = limit_prev[dev_id];
-		if (nvmlh->nvmlDeviceGetPowerManagementDefaultLimit && !plimit) {
+		if(nvmlh->nvmlDeviceGetPowerManagementDefaultLimit && !plimit)
+		{
 			rc = nvmlh->nvmlDeviceGetPowerManagementDefaultLimit(nvmlh->devs[n], &plimit);
-		} else if (plimit) {
+		}
+		else if(plimit)
+		{
 			rc = NVML_SUCCESS;
 		}
-		if (rc == NVML_SUCCESS)
+		if(rc == NVML_SUCCESS)
 			nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
 		ret = 1;
 	}
 	return ret;
 }
 
-
 /**
- * Set power state of a device (9xx)
- * Code is similar as clocks one, which allow the change of the pstate
- */
+* Set power state of a device (9xx)
+* Code is similar as clocks one, which allow the change of the pstate
+*/
 int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
 {
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
-	if (n < 0 || n >= nvmlh->nvml_gpucount)
+	if(n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (device_pstate[dev_id] < 0)
+	if(device_pstate[dev_id] < 0)
 		return 0;
 
-	if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
+	if(nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED)
+	{
 		applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id);
 		return -EPERM;
 	}
 
-	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
+	nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
+	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
 	if(rc != NVML_SUCCESS)
 	{
-		applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetDefaultApplicationsClock: %s", dev_id, nvmlh->nvmlErrorString(rc));
-		return -1;
-	}
-	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
-	if (rc != NVML_SUCCESS) {
 		applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
 		return -EINVAL;
 	}
 
 	// get application config values
-	if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
-	if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
+	if(device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
+	if(device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
 
-	// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
-	uint32_t clocks[MAXCLOCKS] = {0};
-	uint32_t nclocks = MAXCLOCKS;
+	// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
+	uint32_t nclocks = 0, mem_clocks[32] = {0};
 	int8_t wanted_pstate = device_pstate[dev_id];
-	rc = nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
-	if(rc != NVML_SUCCESS)
+	nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
+	nclocks = min(nclocks, 32);
+	if(nclocks)
+		nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
+	if((uint32_t)wanted_pstate + 1 > nclocks)
 	{
-		applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetSupportedMemoryClocks: %s", dev_id, nvmlh->nvmlErrorString(rc));
-		return -1;
+		applog(LOG_WARNING, "GPU #%d: only %u mem clocks available (p-states)", dev_id, nclocks);
 	}
-	if(wanted_pstate < 0)
-		return -1;
-	if(wanted_pstate < nclocks)
+	for(uint8_t u = 0; u < nclocks; u++)
 	{
-		mem_clk = clocks[wanted_pstate];
+		// ordered by pstate (so highest P0 first)
+		if(u == wanted_pstate)
+		{
+			mem_clk = mem_clocks[u];
+			break;
+		}
 	}
-	else
+
+	uint32_t* gpu_clocks = NULL;
+	nclocks = 0;
+	nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
+	if(nclocks)
 	{
-		applog(LOG_WARNING, "GPU #%d: pstate %d is unsupported");
-		return -1;
+		gpu_clocks = (uint32_t*)calloc(1, sizeof(uint32_t) * nclocks + 4);
+		rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
+		if(rc == NVML_SUCCESS)
+		{
+			// ordered desc, get the max app clock (do not limit)
+			gpu_clk = gpu_clocks[0];
+		}
+		free(gpu_clocks);
 	}
 
-	nclocks = MAXCLOCKS;
-	rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
+	rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
 	if(rc != NVML_SUCCESS)
 	{
-		applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetSupportedGraphicsClocks: %s", dev_id, nvmlh->nvmlErrorString(rc));
+		applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int)wanted_pstate,
+			   mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
 		return -1;
 	}
-	if(device_gpu_clocks[dev_id] == 0)
-		gpu_clk = 9999;
-	for(uint8_t u = 0; u < nclocks; u++)
+
+	if(!opt_quiet)
+		applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int)wanted_pstate, mem_clk, gpu_clk);
+
+	clock_prev[dev_id] = 1;
+	return 1;
+}
+
+int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
+{
+	nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
+	uint32_t gpu_clk = 0, mem_clk = 0;
+	int n = nvmlh->cuda_nvml_device_id[dev_id];
+	if(n < 0 || n >= nvmlh->nvml_gpucount)
+		return -ENODEV;
+
+	if(!device_plimit[dev_id])
+		return 0; // nothing to do
+
+	if(!nvmlh->nvmlDeviceSetPowerManagementLimit)
+		return -ENOSYS;
+
+	uint32_t plimit = device_plimit[dev_id] * 1000;
+	uint32_t pmin = 1000, pmax = 0, prev_limit = 0;
+	if(nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
+		rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
+
+	if(rc != NVML_SUCCESS)
 	{
-		// ordered desc, so get first
-		if(clocks[u] <= gpu_clk)
-		{
-			gpu_clk = clocks[u];
-			break;
-		}
+		if(!nvmlh->nvmlDeviceGetPowerManagementLimit)
+			return -ENOSYS;
 	}
+	nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit);
+	if(!pmax) pmax = prev_limit;
 
-	rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
-	if (rc != NVML_SUCCESS) {
-		applog(LOG_WARNING, "GPU #%d: pstate %s", dev_id, nvmlh->nvmlErrorString(rc));
+	plimit = min(plimit, pmax);
+	plimit = max(plimit, pmin);
+	rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
+	if(rc != NVML_SUCCESS)
+	{
+#ifndef WIN32
+		applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
+#endif
 		return -1;
 	}
+	else
+	{
+		device_plimit[dev_id] = plimit / 1000;
+		nvml_plimit_set = true;
+	}
 
-	if (!opt_quiet)
-		applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int)wanted_pstate, mem_clk, gpu_clk);
+	if(!opt_quiet)
+	{
+		applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
+			   dev_id, plimit / 1000U, pmin / 1000U, pmax / 1000U);
+	}
 
-	clock_prev[dev_id] = 1;
+	limit_prev[dev_id] = prev_limit;
 	return 1;
 }
 
+uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id)
+{
+	uint32_t plimit = 0;
+	int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1;
+	if(n < 0 || n >= nvmlh->nvml_gpucount)
+		return 0;
+
+	if(nvmlh->nvmlDeviceGetPowerManagementLimit)
+	{
+		nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit);
+	}
+	return plimit;
+}
+
+// ccminer -D -n
+#define LSTDEV_PFX "        "
+void nvml_print_device_info(int dev_id)
+{
+	if(!hnvml) return;
+
+	int n = hnvml->cuda_nvml_device_id[dev_id];
+	if(n < 0 || n >= hnvml->nvml_gpucount)
+		return;
+
+	nvmlReturn_t rc;
+
+	// fprintf(stderr, "------ Hardware ------\n");
+	int gvid = hnvml->nvml_pci_vendor_id[n] & 0xFFFF;
+	int gpid = hnvml->nvml_pci_vendor_id[n] >> 16;
+	int svid = hnvml->nvml_pci_subsys_id[n] & 0xFFFF;
+	int spid = hnvml->nvml_pci_subsys_id[n] >> 16;
+
+	fprintf(stderr, LSTDEV_PFX "ID %04x:%04x/%04x:%04x BUS %04x:%02x:%02x.0\n", gvid, gpid, svid, spid,
+		(int)hnvml->nvml_pci_domain_id[n], (int)hnvml->nvml_pci_bus_id[n], (int)hnvml->nvml_pci_device_id[n]);
+
+	if(hnvml->nvmlDeviceGetClock)
+	{
+		uint32_t gpu_clk = 0, mem_clk = 0;
+
+		// fprintf(stderr, "------- Clocks -------\n");
+
+		hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk);
+		rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk);
+		if(rc == NVML_SUCCESS)
+		{
+			fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
+		}
+		hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk);
+		rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk);
+		if(rc == NVML_SUCCESS)
+		{
+			fprintf(stderr, LSTDEV_PFX "TARGET  MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
+		}
+		hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk);
+		rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk);
+		if(rc == NVML_SUCCESS)
+		{
+			fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
+		}
+	}
+}
 
 int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
 {
@@ -576,16 +671,17 @@ int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount)
 	return 0;
 }
 
+
 int nvml_get_gpu_name(nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (!nvmlh->nvmlDeviceGetName)
+	if(!nvmlh->nvmlDeviceGetName)
 		return -ENOSYS;
 
-	if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != NVML_SUCCESS)
+	if(nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != NVML_SUCCESS)
 		return -1;
 
 	return 0;
@@ -596,14 +692,15 @@ int nvml_get_tempC(nvml_handle *nvmlh, int cudaindex, unsigned int *tempC)
 {
 	nvmlReturn_t rc;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (!nvmlh->nvmlDeviceGetTemperature)
+	if(!nvmlh->nvmlDeviceGetTemperature)
 		return -ENOSYS;
 
 	rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
-	if (rc != NVML_SUCCESS) {
+	if(rc != NVML_SUCCESS)
+	{
 		return -1;
 	}
 
@@ -615,32 +712,50 @@ int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
 {
 	nvmlReturn_t rc;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (!nvmlh->nvmlDeviceGetFanSpeed)
+	if(!nvmlh->nvmlDeviceGetFanSpeed)
 		return -ENOSYS;
 
 	rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
-	if (rc != NVML_SUCCESS) {
+	if(rc != NVML_SUCCESS)
+	{
 		return -1;
 	}
 
 	return 0;
 }
 
+
+int nvml_get_current_clocks(int cudaindex, unsigned int *graphics_clock, unsigned int *mem_clock)
+{
+	nvmlReturn_t rc;
+	int gpuindex = hnvml->cuda_nvml_device_id[cudaindex];
+	if(gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -ENODEV;
+	if(!hnvml->nvmlDeviceGetClockInfo) return -ENOSYS;
+
+	rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock);
+	if(rc != NVML_SUCCESS) return -1;
+	rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock);
+	if(rc != NVML_SUCCESS) return -1;
+
+	return 0;
+}
+
 /* Not Supported on 750Ti 340.23 */
 int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (!nvmlh->nvmlDeviceGetPowerUsage)
+	if(!nvmlh->nvmlDeviceGetPowerUsage)
 		return -ENOSYS;
 
 	nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
-	if (res != NVML_SUCCESS) {
+	if(res != NVML_SUCCESS)
+	{
 		//if (opt_debug)
 		//	applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
 		return -1;
@@ -653,14 +768,15 @@ int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliw
 int nvml_get_pstate(nvml_handle *nvmlh, int cudaindex, int *pstate)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (!nvmlh->nvmlDeviceGetPerformanceState)
+	if(!nvmlh->nvmlDeviceGetPerformanceState)
 		return -ENOSYS;
 
 	nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
-	if (res != NVML_SUCCESS) {
+	if(res != NVML_SUCCESS)
+	{
 		//if (opt_debug)
 		//	applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
 		return -1;
@@ -672,7 +788,7 @@ int nvml_get_pstate(nvml_handle *nvmlh, int cudaindex, int *pstate)
 int nvml_get_busid(nvml_handle *nvmlh, int cudaindex, int *busid)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
 	(*busid) = nvmlh->nvml_pci_bus_id[gpuindex];
@@ -685,30 +801,32 @@ int nvml_get_serial(nvml_handle *nvmlh, int cudaindex, char *sn, int maxlen)
 	char uuid[NVML_DEVICE_UUID_BUFFER_SIZE];
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
 	nvmlReturn_t res;
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (nvmlh->nvmlDeviceGetSerial) {
+	if(nvmlh->nvmlDeviceGetSerial)
+	{
 		res = nvmlh->nvmlDeviceGetSerial(nvmlh->devs[gpuindex], sn, maxlen);
-		if (res == NVML_SUCCESS)
+		if(res == NVML_SUCCESS)
 			return 0;
 	}
 
-	if (!nvmlh->nvmlDeviceGetUUID)
+	if(!nvmlh->nvmlDeviceGetUUID)
 		return -ENOSYS;
 
 	// nvmlDeviceGetUUID: GPU-f2bd642c-369f-5a14-e0b4-0d22dfe9a1fc
 	// use a part of uuid to generate an unique serial
 	// todo: check if there is vendor id is inside
 	memset(uuid, 0, sizeof(uuid));
-	res = nvmlh->nvmlDeviceGetUUID(nvmlh->devs[gpuindex], uuid, sizeof(uuid)-1);
-	if (res != NVML_SUCCESS) {
-		if (opt_debug)
+	res = nvmlh->nvmlDeviceGetUUID(nvmlh->devs[gpuindex], uuid, sizeof(uuid) - 1);
+	if(res != NVML_SUCCESS)
+	{
+		if(opt_debug)
 			applog(LOG_DEBUG, "nvmlDeviceGetUUID: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
-	strncpy(sn, &uuid[4], min((int) strlen(uuid), maxlen));
-	sn[maxlen-1] = '\0';
+	strncpy(sn, &uuid[4], min((int)strlen(uuid), maxlen));
+	sn[maxlen - 1] = '\0';
 	return 0;
 }
 
@@ -716,15 +834,16 @@ int nvml_get_bios(nvml_handle *nvmlh, int cudaindex, char *desc, int maxlen)
 {
 	uint32_t subids = 0;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if (!nvmlh->nvmlDeviceGetVbiosVersion)
+	if(!nvmlh->nvmlDeviceGetVbiosVersion)
 		return -ENOSYS;
 
 	nvmlReturn_t res = nvmlh->nvmlDeviceGetVbiosVersion(nvmlh->devs[gpuindex], desc, maxlen);
-	if (res != NVML_SUCCESS) {
-		if (opt_debug)
+	if(res != NVML_SUCCESS)
+	{
+		if(opt_debug)
 			applog(LOG_DEBUG, "nvmlDeviceGetVbiosVersion: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
@@ -735,13 +854,15 @@ int nvml_get_info(nvml_handle *nvmlh, int cudaindex, uint16_t &vid, uint16_t &pi
 {
 	uint32_t subids = 0;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
 	subids = nvmlh->nvml_pci_subsys_id[gpuindex];
-	if (!subids) subids = nvmlh->nvml_pci_device_id[gpuindex];
+	if(!subids) subids = nvmlh->nvml_pci_vendor_id[gpuindex];
 	pid = subids >> 16;
 	vid = subids & 0xFFFF;
+	// Colorful and Inno3D
+	if(pid == 0) pid = nvmlh->nvml_pci_vendor_id[gpuindex] >> 16;
 	return 0;
 }
 
@@ -754,6 +875,7 @@ int nvml_destroy(nvml_handle *nvmlh)
 	free(nvmlh->nvml_pci_bus_id);
 	free(nvmlh->nvml_pci_device_id);
 	free(nvmlh->nvml_pci_domain_id);
+	free(nvmlh->nvml_pci_vendor_id);
 	free(nvmlh->nvml_pci_subsys_id);
 	free(nvmlh->nvml_cuda_device_id);
 	free(nvmlh->cuda_nvml_device_id);
@@ -764,35 +886,41 @@ int nvml_destroy(nvml_handle *nvmlh)
 	return 0;
 }
 
+// ----------------------------------------------------------------------------
+
 /**
- * nvapi alternative for windows x86 binaries
- * nvml api doesn't exists as 32bit dll :///
- */
+* nvapi alternative for windows x86 binaries
+* nvml api doesn't exists as 32bit dll :///
+*/
 #ifdef WIN32
 #include "nvapi/nvapi_ccminer.h"
 
-static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 };
+static unsigned int nvapi_dev_map[MAX_GPUS] = {0};
+static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = {0};
+static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = {0};
 static NvU32 nvapi_dev_cnt = 0;
+extern bool nvapi_dll_loaded;
 
 int nvapi_temperature(unsigned int devNum, unsigned int *temperature)
 {
 	NvAPI_Status ret;
 
-	if (devNum >= nvapi_dev_cnt)
+	if(devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	NV_GPU_THERMAL_SETTINGS thermal;
 	thermal.version = NV_GPU_THERMAL_SETTINGS_VER;
 	ret = NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &thermal);
-	if (ret != NVAPI_OK) {
+	if(ret != NVAPI_OK)
+	{
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
+		if(opt_debug)
 			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetThermalSettings: %s", string);
 		return -1;
 	}
 
-	(*temperature) = (unsigned int) thermal.sensor[0].currentTemp;
+	(*temperature) = (unsigned int)thermal.sensor[0].currentTemp;
 
 	return 0;
 }
@@ -801,43 +929,46 @@ int nvapi_fanspeed(unsigned int devNum, unsigned int *speed)
 {
 	NvAPI_Status ret;
 
-	if (devNum >= nvapi_dev_cnt)
+	if(devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	NvU32 fanspeed = 0;
 	ret = NvAPI_GPU_GetTachReading(phys[devNum], &fanspeed);
-	if (ret != NVAPI_OK) {
+	if(ret != NVAPI_OK)
+	{
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
+		if(opt_debug)
 			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetTachReading: %s", string);
 		return -1;
 	}
 
-	(*speed) = (unsigned int) fanspeed;
+	(*speed) = (unsigned int)fanspeed;
 
 	return 0;
 }
 
-int nvapi_getpstate(unsigned int devNum, unsigned int *power)
+int nvapi_getpstate(unsigned int devNum, unsigned int *pstate)
 {
 	NvAPI_Status ret;
 
-	if (devNum >= nvapi_dev_cnt)
+	if(devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	NV_GPU_PERF_PSTATE_ID CurrentPstate = NVAPI_GPU_PERF_PSTATE_UNDEFINED; /* 16 */
 	ret = NvAPI_GPU_GetCurrentPstate(phys[devNum], &CurrentPstate);
-	if (ret != NVAPI_OK) {
+	if(ret != NVAPI_OK)
+	{
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
+		if(opt_debug)
 			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetCurrentPstate: %s", string);
 		return -1;
 	}
-	else {
+	else
+	{
 		// get pstate for the moment... often 0 = P0
-		(*power) = (unsigned int)CurrentPstate;
+		(*pstate) = (unsigned int)CurrentPstate;
 	}
 
 	return 0;
@@ -848,21 +979,23 @@ int nvapi_getusage(unsigned int devNum, unsigned int *pct)
 {
 	NvAPI_Status ret;
 
-	if (devNum >= nvapi_dev_cnt)
+	if(devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	NV_GPU_DYNAMIC_PSTATES_INFO_EX info;
 	info.version = NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER;
 	ret = NvAPI_GPU_GetDynamicPstatesInfoEx(phys[devNum], &info);
-	if (ret != NVAPI_OK) {
+	if(ret != NVAPI_OK)
+	{
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
+		if(opt_debug)
 			applog(LOG_DEBUG, "NVAPI GetDynamicPstatesInfoEx: %s", string);
 		return -1;
 	}
-	else {
-		if (info.utilization[UTIL_DOMAIN_GPU].bIsPresent)
+	else
+	{
+		if(info.utilization[UTIL_DOMAIN_GPU].bIsPresent)
 			(*pct) = info.utilization[UTIL_DOMAIN_GPU].percentage;
 	}
 
@@ -874,23 +1007,27 @@ int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid)
 	NvAPI_Status ret;
 	NvU32 pDeviceId, pSubSystemId, pRevisionId, pExtDeviceId;
 
-	if (devNum >= nvapi_dev_cnt)
+	if(devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	ret = NvAPI_GPU_GetPCIIdentifiers(phys[devNum], &pDeviceId, &pSubSystemId, &pRevisionId, &pExtDeviceId);
-	if (ret != NVAPI_OK) {
+	if(ret != NVAPI_OK)
+	{
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
+		if(opt_debug)
 			applog(LOG_DEBUG, "NVAPI GetPCIIdentifiers: %s", string);
 		return -1;
 	}
 
 	pid = pDeviceId >> 16;
 	vid = pDeviceId & 0xFFFF;
-	if (vid == 0x10DE && pSubSystemId) {
+	if(vid == 0x10DE && pSubSystemId)
+	{
 		vid = pSubSystemId & 0xFFFF;
 		pid = pSubSystemId >> 16;
+		// Colorful and Inno3D
+		if(pid == 0) pid = pDeviceId >> 16;
 	}
 
 	return 0;
@@ -898,497 +1035,1431 @@ int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid)
 
 int nvapi_getserial(unsigned int devNum, char *serial, unsigned int maxlen)
 {
-//	NvAPI_Status ret;
-	if (devNum >= nvapi_dev_cnt)
+	NvAPI_Status ret;
+	if(devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
-	sprintf(serial, "");
+	memset(serial, 0, maxlen);
 
-	if (maxlen < 64) // Short String
-		return -1;
+	if(maxlen < 11)
+		return -EINVAL;
 
-#if 0
-	ret = NvAPI_GPU_Get..(phys[devNum], serial);
-	if (ret != NVAPI_OK) {
+	NvAPI_ShortString ser = {0};
+	ret = NvAPI_DLL_GetSerialNumber(phys[devNum], ser);
+	if(ret != NVAPI_OK)
+	{
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
-			applog(LOG_DEBUG, "NVAPI ...: %s", string);
+		if(opt_debug)
+			applog(LOG_DEBUG, "NVAPI GetSerialNumber: %s", string);
 		return -1;
 	}
-#endif
+
+	uint8_t *bytes = (uint8_t*)ser;
+	for(int n = 0; n<5; n++) sprintf(&serial[n * 2], "%02X", bytes[n]);
 	return 0;
 }
 
 int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen)
 {
 	NvAPI_Status ret;
-	if (devNum >= nvapi_dev_cnt)
+	if(devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
-	if (maxlen < 64) // Short String
+	if(maxlen < 64) // Short String
 		return -1;
 
 	ret = NvAPI_GPU_GetVbiosVersionString(phys[devNum], desc);
-	if (ret != NVAPI_OK) {
+	if(ret != NVAPI_OK)
+	{
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
+		if(opt_debug)
 			applog(LOG_DEBUG, "NVAPI GetVbiosVersionString: %s", string);
 		return -1;
 	}
 	return 0;
 }
-uint8_t nvapi_get_plimit(unsigned int devNum)
+
+static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevState)
 {
 	NvAPI_Status ret = NVAPI_OK;
-	NVAPI_GPU_POWER_STATUS pol = {0};
-	pol.version = NVAPI_GPU_POWER_STATUS_VER;
-	if((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK)
+	NV_I2C_INFO_EX* i2cInfo;
+
+	int delay1 = 20000;
+	int delay2 = 0;
+
+	uchar4 rgb = {0};
+	memcpy(&rgb, &RGB, 4);
+	uchar4 prgb = {0};
+	int32_t prev = device_led_state[nvapi_devid(devNum)];
+	memcpy(&prgb, &prev, 4);
+
+	NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo);
+	if(i2cInfo == NULL) return -ENOMEM;
+
+	NvU32 data[5] = {0};
+	NvU32 datv[2] = {0, 1};
+	NvU32 datw[2] = {1, 0};
+	if(rgb.z != prgb.z || ignorePrevState)
 	{
-		NvAPI_ShortString string;
-		NvAPI_GetErrorMessage(ret, string);
-		if(opt_debug)
-			applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string);
-		return 0;
+		data[2] = 4; // R:4 G:5 B:6, Mode = 7 (1 static, 2 breath, 3 blink, 4 demo)
+		data[3] = 1;
+		datv[0] = rgb.z | 0x13384000;
+
+		i2cInfo->i2cDevAddress = 0x52;
+		i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+		i2cInfo->regAddrSize = 1;
+		i2cInfo->pbData = (NvU8*)datv;
+		i2cInfo->cbRead = 5;
+		i2cInfo->cbSize = 1;
+		i2cInfo->portId = 1;
+		i2cInfo->bIsPortIdSet = 1;
+
+		ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
+		usleep(delay1);
+		has_rgb_ok = (ret == NVAPI_OK);
 	}
-	return (uint8_t)(pol.entries[0].power / 1000); // in percent
+
+	if(rgb.y != prgb.y || ignorePrevState)
+	{
+		data[2] = 5;
+		data[3] = 1;
+		datv[0] = rgb.y | 0x4000;
+
+		i2cInfo->i2cDevAddress = 0x52;
+		i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+		i2cInfo->regAddrSize = 1;
+		i2cInfo->pbData = (NvU8*)datv;
+		i2cInfo->cbRead = 5;
+		i2cInfo->cbSize = 1;
+		i2cInfo->portId = 1;
+		i2cInfo->bIsPortIdSet = 1;
+
+		ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
+		usleep(delay1);
+		has_rgb_ok = (ret == NVAPI_OK);
+	}
+
+	if(rgb.y != prgb.y || ignorePrevState)
+	{
+		data[2] = 6;
+		data[3] = 1;
+		datv[0] = rgb.x | 0x4000;
+
+		i2cInfo->i2cDevAddress = 0x52;
+		i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+		i2cInfo->regAddrSize = 1;
+		i2cInfo->pbData = (NvU8*)datv;
+		i2cInfo->cbRead = 5;
+		i2cInfo->cbSize = 1;
+		i2cInfo->portId = 1;
+		i2cInfo->bIsPortIdSet = 1;
+
+		ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
+		usleep(delay1);
+		has_rgb_ok = (ret == NVAPI_OK);
+	}
+
+	if(rgb.w && ignorePrevState)
+	{
+		data[2] = 7;
+		data[3] = 1;
+		datv[0] = rgb.w | 0x4000;
+
+		i2cInfo->i2cDevAddress = 0x52;
+		i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+		i2cInfo->regAddrSize = 1;
+		i2cInfo->pbData = (NvU8*)datv;
+		i2cInfo->cbRead = 5;
+		i2cInfo->cbSize = 1;
+		i2cInfo->portId = 1;
+		i2cInfo->bIsPortIdSet = 1;
+
+		ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
+		usleep(delay1);
+		has_rgb_ok = (ret == NVAPI_OK);
+	}
+	usleep(delay2);
+	free(i2cInfo);
+	return (int)ret;
 }
 
-int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
+static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB)
 {
-	NvAPI_Status ret = NVAPI_OK;
-	uint32_t val = percent * 1000;
+	NvAPI_Status ret;
+	NV_I2C_INFO_EX* i2cInfo;
+	NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo);
+	if(i2cInfo == NULL)
+		return -ENOMEM;
+
+	NvU32 readBuf[25] = {0};
+	NvU32 data[5] = {0};
+	data[0] = 1;
+	data[2] = swab32(RGB & 0xfcfcfcU) | 0x40;
+
+	i2cInfo->i2cDevAddress = 0x48 << 1;
+	i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+	i2cInfo->regAddrSize = 4; // NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS
+	i2cInfo->pbData = (NvU8*)readBuf;
+	i2cInfo->cbRead = 2;
+	i2cInfo->cbSize = sizeof(readBuf);
+	i2cInfo->portId = 1;
+	i2cInfo->bIsPortIdSet = 1;
+
+	//ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, data);
+	ret = NvAPI_DLL_I2CReadEx(phys[devNum], i2cInfo, data);
+	usleep(20000);
+	free(i2cInfo);
+	return (int)ret;
+}
+
+static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB)
+{
+	NvAPI_Status ret;
+	NV_I2C_INFO* i2cInfo;
+	NV_INIT_STRUCT_ALLOC(NV_I2C_INFO, i2cInfo);
+	if(i2cInfo == NULL)
+		return -ENOMEM;
+
+	NvU32 buf[25] = {0};
+	NvU32 data[5] = {0};
+
+	uint32_t color = 0, level = 0x40;
+
+	uchar4 rgb = {0};
+	memcpy(&rgb, &RGB, 4);
+	level = rgb.x & 0xF0;
+	level |= rgb.y & 0xF0;
+	level |= rgb.z & 0xF0;
+	//applog(LOG_DEBUG, "R %u G %u B %u", rgb.z, rgb.y, rgb.x);
+
+	// Not really RGB custom, only some basic colors, so convert
+	// 0: Red, 1: Yellow, 2: Green, 3: Cyan, 4: Blue, 5: magenta, 6: white
+	if((RGB & 0xFF0000) && (RGB & 0xFF00) && (RGB & 0xFF)) color = 6;
+	else if((RGB & 0xFF0000) && (RGB & 0xFF)) color = 5;
+	else if((RGB & 0xFF00) && (RGB & 0xFF)) color = 3;
+	else if((RGB & 0xFF0000) && (RGB & 0xFF00)) color = 1;
+	else if(RGB & 0xFF) color = 4;
+	else if(RGB & 0xFF00) color = 2;
+
+	buf[0] = 0xF0; // F0 set colors
+	buf[0] |= (color << 8);  // logo
+	buf[0] |= (1 << 16); // top
+	if(RGB != 0) // level : 0x10 to 0xF0
+		buf[0] |= (level << 24);
+	else
+		buf[0] |= (0x10U << 24);
+
+	// todo: i2c data crc ?
+
+	i2cInfo->displayMask = 1;
+	i2cInfo->bIsDDCPort = 1;
+	i2cInfo->i2cDevAddress = 0x48 << 1;
+	i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+	i2cInfo->regAddrSize = 1;
+	i2cInfo->pbData = (NvU8*)buf;
+	i2cInfo->cbSize = 4;
+	i2cInfo->i2cSpeed = NVAPI_I2C_SPEED_DEPRECATED;
+	i2cInfo->i2cSpeedKhz = NVAPI_I2C_SPEED_100KHZ; // 4
+	i2cInfo->portId = 1;
+	i2cInfo->bIsPortIdSet = 1;
+
+	ret = NvAPI_I2CWrite(phys[devNum], i2cInfo);
+	// required to prevent i2c lock
+	usleep(20000);
+
+#if 0
+	buf[0] = 0xF7; // F7 toggle leds
+	if(RGB == 0)
+		buf[0] |= (1 << 8);  // 0 logo on, 1 off
+	buf[0] |= (1 << 16); // 1 top off
+	ret = NvAPI_I2CWrite(phys[devNum], i2cInfo);
+	usleep(20000);
+#endif
+	// other modes:
+	// 0xF1 breathing green (0x070202F1)
+	// 0xF2 strobe green    (0x070202F2)
+	// 0xF3 cycle           (0x000000F3)
+
+	free(i2cInfo);
+	return (int)ret;
+}
 
-	NVAPI_GPU_POWER_INFO nfo = {0};
-	nfo.version = NVAPI_GPU_POWER_INFO_VER;
-	ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
-	if(ret == NVAPI_OK)
+int nvapi_set_led(unsigned int devNum, int RGB, char *device_name)
+{
+	uint16_t vid = 0, pid = 0;
+	NvAPI_Status ret;
+	if(strstr(device_name, "Gigabyte GTX 10"))
 	{
-		if(val == 0)
-			val = nfo.entries[0].def_power;
-		else if(val < nfo.entries[0].min_power)
-			val = nfo.entries[0].min_power;
-		else if(val > nfo.entries[0].max_power)
-			val = nfo.entries[0].max_power;
+		if(opt_debug)
+			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int)phys[devNum], RGB);
+		return SetGigabyteRGBLogo(devNum, (uint32_t)RGB);
 	}
-
-	NVAPI_GPU_POWER_STATUS pol = {0};
-	pol.version = NVAPI_GPU_POWER_STATUS_VER;
-	pol.flags = 1;
-	pol.entries[0].power = val;
-	if((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK)
+	else if(strstr(device_name, "ASUS GTX 10"))
 	{
-		NvAPI_ShortString string;
-		NvAPI_GetErrorMessage(ret, string);
 		if(opt_debug)
-			applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string);
-		return -1;
+			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int)phys[devNum], RGB);
+		return SetAsusRGBLogo(devNum, (uint32_t)RGB, !has_rgb_ok);
+	}
+	else if(strstr(device_name, "Zotac GTX 10"))
+	{
+		if(opt_debug)
+			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int)phys[devNum], RGB);
+		return SetZotacRGBLogo(devNum, (uint32_t)RGB);
+	}
+	else
+	{
+		NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM* illu;
+		NV_INIT_STRUCT_ALLOC(NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM, illu);
+		illu->hPhysicalGpu = phys[devNum];
+		illu->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS;
+		ret = NvAPI_GPU_QueryIlluminationSupport(illu);
+		if(!ret && illu->bSupported)
+		{
+			NV_GPU_GET_ILLUMINATION_PARM *led;
+			NV_INIT_STRUCT_ALLOC(NV_GPU_GET_ILLUMINATION_PARM, led);
+			led->hPhysicalGpu = phys[devNum];
+			led->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS;
+			NvAPI_GPU_GetIllumination(led);
+			if(opt_debug)
+				applog(LOG_DEBUG, "GPU %x: Led level was %d, set to %d", (int)phys[devNum], led->Value, RGB);
+			led->Value = (uint32_t)RGB;
+			ret = NvAPI_GPU_SetIllumination((NV_GPU_SET_ILLUMINATION_PARM*)led);
+			free(led);
+		}
+		free(illu);
+		return ret;
 	}
-	return ret;
 }
 
-int nvapi_init()
+int nvapi_pstateinfo(unsigned int devNum)
 {
-	int num_gpus = cuda_num_devices();
-	NvAPI_Status ret = NvAPI_Initialize();
-	if (!ret == NVAPI_OK){
-		NvAPI_ShortString string;
-		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
-			applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string);
-		return -1;
-	}
+	uint32_t n;
+	NvAPI_Status ret;
+	uint32_t* mem = (uint32_t*)calloc(1, 0x4000);
+	if(!mem)
+		return -ENOMEM;
 
-	ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt);
-	if (ret != NVAPI_OK) {
-		NvAPI_ShortString string;
-		NvAPI_GetErrorMessage(ret, string);
-		if (opt_debug)
-			applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string);
-		return -1;
-	}
+	unsigned int current = 0xFF;
+	// useless on init but...
+	nvapi_getpstate(devNum, &current);
 
-	for (int g = 0; g < num_gpus; g++) {
-		cudaDeviceProp props;
-		if (cudaGetDeviceProperties(&props, g) == cudaSuccess) {
-			device_bus_ids[g] = props.pciBusID;
-		}
-		nvapi_dev_map[g] = g; // default mapping
-	}
-
-	for (NvU8 i = 0; i < nvapi_dev_cnt; i++) {
-		NvAPI_ShortString name;
-		ret = NvAPI_GPU_GetFullName(phys[i], name);
-		if (ret == NVAPI_OK) {
-			for (int g = 0; g < num_gpus; g++) {
-				NvU32 busId;
-				ret = NvAPI_GPU_GetBusId(phys[i], &busId);
-				if (ret == NVAPI_OK && busId == device_bus_ids[g]) {
-					nvapi_dev_map[g] = i;
-					if (opt_debug)
-						applog(LOG_DEBUG, "CUDA GPU %d matches NVAPI GPU %d by busId %u",
-							g, i, busId);
-					break;
-				}
-			}
-		} else {
+#if 0
+	// try :p
+	uint32_t* buf = (uint32_t*)calloc(1, 0x8000);
+	for(int i = 8; i < 0x8000 && buf; i += 4)
+	{
+		buf[0] = 0x10000 + i;
+		NV_GPU_PERF_PSTATE_ID pst = NVAPI_GPU_PERF_PSTATE_P0;
+		ret = NvAPI_DLL_GetPstateClientLimits(phys[devNum], pst, buf);
+		if(ret != NVAPI_INCOMPATIBLE_STRUCT_VERSION)
+		{
 			NvAPI_ShortString string;
 			NvAPI_GetErrorMessage(ret, string);
-			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
+			applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string);
+			for(int n = 0; n < i / 32; n++)
+				applog_hex(&buf[n*(32 / 4)], 32);
+			break;
 		}
 	}
+	free(buf);
+#endif
 
 #if 0
-	NvAPI_ShortString ver;
-	NvAPI_GetInterfaceVersionString(ver);
-	applog(LOG_DEBUG, "NVAPI Version: %s", ver);
+	// Unsure of the meaning of these values
+	NVAPI_GPU_POWER_TOPO topo = {0};
+	topo.version = NVAPI_GPU_POWER_TOPO_VER;
+	if((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK)
+	{
+		if(topo.count)
+			applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?",
+			(double)topo.entries[0].power / 1000, (double)topo.entries[1].power / 1000);
+		// Ok on 970, not pascal
+		NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = {0};
+		pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2;
+		pset2.ov.numVoltages = 1;
+		pset2.ov.voltages[0].voltDelta_uV.value = 3000;  // gpu + 3000 uv;
+		ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2);
 #endif
 
-	NvU32 udv;
-	NvAPI_ShortString str;
-	ret = NvAPI_SYS_GetDriverAndBranchVersion(&udv, str);
-	if (ret == NVAPI_OK) {
-		sprintf(driver_version,"%d.%02d", udv / 100, udv % 100);
-	}
+		NV_GPU_PERF_PSTATES20_INFO* info;
+		NV_INIT_STRUCT_ON(NV_GPU_PERF_PSTATES20_INFO, info, mem);
+		if((ret = NvAPI_GPU_GetPstates20(phys[devNum], info)) != NVAPI_OK)
+		{
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			if(opt_debug)
+				applog(LOG_RAW, "NVAPI GetPstates20: %s", string);
+			return -1;
+		}
 
-	return 0;
-}
-#endif
+		for(n = 0; n < info->numPstates; n++)
+		{
+			NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info->pstates[n].clocks;
+			applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d",
+				   info->pstates[n].pstateId == current ? ">" : " ", (int)info->pstates[n].pstateId,
+				   clocks[1].data.single.freq_kHz / 1000, clocks[1].bIsEditable ? "*" : " ",
+				   (double)clocks[0].data.single.freq_kHz / 1000, clocks[0].bIsEditable ? "*" : " ",
+				   info->pstates[n].baseVoltages[0].volt_uV / 1000, info->pstates[n].baseVoltages[0].bIsEditable ? "*" : " ",
+				   info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min / 1000, // range if editable
+				   info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max / 1000);
+			if(clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value)
+			{
+				applog(LOG_RAW, "      OC %+4d MHz      %+6.1f MHz",
+					   clocks[1].freqDelta_kHz.value / 1000, (double)clocks[0].freqDelta_kHz.value / 1000);
+			}
+		}
+		// boost over volting (GTX 9xx only ?)
+		for(n = 0; n < info->ov.numVoltages; n++)
+		{
+			applog(LOG_RAW, " OV: %u%+d mV%s \x7F %d/%d",
+				   info->ov.voltages[n].volt_uV / 1000, info->ov.voltages[n].voltDelta_uV.value / 1000, info->ov.voltages[n].bIsEditable ? "*" : " ",
+				   info->ov.voltages[n].voltDelta_uV.valueRange.min / 1000, info->ov.voltages[n].voltDelta_uV.valueRange.max / 1000);
+		}
 
-/* api functions -------------------------------------- */
+		NV_GPU_CLOCK_FREQUENCIES *freqs;
+		NV_INIT_STRUCT_ON(NV_GPU_CLOCK_FREQUENCIES, freqs, mem);
+		freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+		applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz     Base Clocks",
+			(double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+			   (double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+		freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
+		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+		applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz     Boost Clocks",
+			(double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+			   (double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+		freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
+		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+		applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz    >Current",
+			(double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+			   (double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+		// Other clock values ??
+		NVAPI_GPU_PERF_CLOCKS *pcl;
+		NV_INIT_STRUCT_ALLOC(NVAPI_GPU_PERF_CLOCKS, pcl);
+		int numClock = 0; ret = NVAPI_OK;
+		while(ret == NVAPI_OK)
+		{
+			if((ret = NvAPI_DLL_GetPerfClocks(phys[devNum], numClock, pcl)) == NVAPI_OK)
+			{
+				applog(LOG_RAW, " C%d: MEM %4.0f MHz  GPU %6.1f MHz [%5.1f/%6.1f]", numClock,
+					(double)pcl->memFreq1 / 1000, (double)pcl->gpuFreq1 / 1000, (double)pcl->gpuFreqMin / 1000, (double)pcl->gpuFreqMax / 1000);
+				//	ret = NvAPI_DLL_SetPerfClocks(phys[devNum], numClock, pcl); // error
+			}
+			numClock++;
+		}
 
-// assume 2500 rpm as default, auto-updated if more
-static unsigned int fan_speed_max = 2500;
+		// Pascal only
+		NVAPI_VOLTBOOST_PERCENT *pvb;
+		NV_INIT_STRUCT_ON(NVAPI_VOLTBOOST_PERCENT, pvb, mem);
+		if((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], pvb)) == NVAPI_OK)
+		{
+			NVAPI_VOLTAGE_STATUS *pvdom;
+			NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGE_STATUS, pvdom);
+			NvAPI_DLL_GetCurrentVoltage(phys[devNum], pvdom);
+			if(pvdom && pvdom->value_uV)
+				applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom->value_uV / 1000, pvb->percent);
+			else if(pvdom)
+				applog(LOG_RAW, " GPU Voltage is %u mV", pvdom->value_uV / 1000);
+			free(pvdom);
+		}
+		else
+		{
+			// Maxwell 9xx
+			NVAPI_VOLT_STATUS *mvdom, *mvstep;
+			NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvdom);
+			if(mvdom && (ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], mvdom)) == NVAPI_OK)
+			{
+				NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvstep);
+				NvAPI_DLL_GetVoltageStep(phys[devNum], mvstep);
+				if(mvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %.1f mV with %.3f mV resolution",
+					(double)mvdom->value_uV / 1000, (double)mvstep->value_uV / 1000);
+				free(mvstep);
+			}
+			free(mvdom);
+		}
 
-unsigned int gpu_fanpercent(struct cgpu_info *gpu)
-{
-	unsigned int pct = 0;
-	if (hnvml) {
-		nvml_get_fanpcnt(hnvml, gpu->gpu_id, &pct);
-	}
-#ifdef WIN32
-	else {
-		unsigned int rpm = 0;
-		nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
-		pct = (rpm * 100) / fan_speed_max;
-		if (pct > 100) {
-			pct = 100;
-			fan_speed_max = rpm;
+		uint32_t plim = nvapi_get_plimit(devNum);
+		double min_pw = 0, max_pw = 0; // percent
+
+		NVAPI_GPU_POWER_INFO nfo = {0};
+		nfo.version = NVAPI_GPU_POWER_INFO_VER;
+		ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
+		if(ret == NVAPI_OK && nfo.valid)
+		{
+			min_pw = (double)nfo.entries[0].min_power / 1000;
+			max_pw = (double)nfo.entries[0].max_power / 1000;
+		}
+		applog(LOG_RAW, " Power limit is set to %u%%, range [%.0f-%.0f%%]", plim, min_pw, max_pw);
+
+#if 0
+		NVAPI_COOLER_SETTINGS *cooler;
+		NV_INIT_STRUCT_ON(NVAPI_COOLER_SETTINGS, cooler, mem);
+		ret = NvAPI_DLL_GetCoolerSettings(phys[devNum], 7, cooler);
+		if(ret == NVAPI_OK)
+		{
+			applog(LOG_RAW, " Fan level is set to %u%%", cooler->level); // wrong val, seems 1 (auto ?)
+			NVAPI_COOLER_LEVEL *fan;
+			NV_INIT_STRUCT_ALLOC(NVAPI_COOLER_LEVEL, fan);
+			fan->level = 100;
+			fan->count = 1;
+			ret = NvAPI_DLL_SetCoolerLevels(phys[devNum], 7, fan);
+			free(fan);
+			sleep(10);
+			ret = NvAPI_DLL_RestoreCoolerSettings(phys[devNum], cooler, 7);
 		}
-	}
 #endif
-	return pct;
-}
 
-unsigned int gpu_fanrpm(struct cgpu_info *gpu)
-{
-	unsigned int rpm = 0;
-#ifdef WIN32
-	nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
+		NV_GPU_THERMAL_SETTINGS *tset;
+		NV_INIT_STRUCT_ON(NV_GPU_THERMAL_SETTINGS, tset, mem);
+
+		NVAPI_GPU_THERMAL_INFO *tnfo;
+		NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_INFO, tnfo);
+		NVAPI_GPU_THERMAL_LIMIT *tlim;
+		NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_LIMIT, tlim);
+		NvAPI_GPU_GetThermalSettings(phys[devNum], 0, tset);
+		NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], tnfo);
+		if((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], tlim)) == NVAPI_OK)
+		{
+			applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]",
+				   tlim->entries[0].value >> 8, tset->sensor[0].currentTemp,
+				   tnfo->entries[0].min_temp >> 8, tnfo->entries[0].max_temp >> 8);
+		}
+		free(tnfo);
+		free(tlim);
+
+#if 1
+		// Read pascal Clocks Table, Empty on 9xx
+		//NVAPI_CLOCKS_RANGE* ranges;
+		//NV_INIT_STRUCT_ON(NVAPI_CLOCKS_RANGE, ranges, mem);
+		//ret = NvAPI_DLL_GetClockBoostRanges(phys[devNum], ranges);
+
+		NVAPI_CLOCK_MASKS* boost;
+		NV_INIT_STRUCT_ON(NVAPI_CLOCK_MASKS, boost, mem);
+		ret = NvAPI_DLL_GetClockBoostMask(phys[devNum], boost);
+		int gpuClocks = 0, memClocks = 0;
+		for(n = 0; n < 80 + 23; n++)
+		{
+			if(boost->clocks[n].memDelta) memClocks++;
+			if(boost->clocks[n].gpuDelta) gpuClocks++;
+		}
+
+		// PASCAL GTX ONLY
+		if(gpuClocks || memClocks)
+		{
+			NVAPI_CLOCK_TABLE *table;
+			NV_INIT_STRUCT_ALLOC(NVAPI_CLOCK_TABLE, table);
+			memcpy(table->mask, boost->mask, 12);
+			ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], table);
+			gpuClocks = 0, memClocks = 0;
+			for(n = 0; n < 12; n++)
+			{
+				if(table->buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table->buf0[n]);
+			}
+			for(n = 0; n < 80; n++)
+			{
+				if(table->gpuDeltas[n].freqDelta)
+				{
+					// note: gpu delta value seems to be x2, not the memory
+					//applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table->gpuDeltas[n].freqDelta/2000);
+					gpuClocks++;
+				}
+			}
+			for(n = 0; n < 23; n++)
+			{
+				if(table->memFilled[n])
+				{
+					//applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table->memDeltas[n]/1000);
+					memClocks++;
+				}
+			}
+			for(n = 0; n < 1529; n++)
+			{
+				if(table->buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table->buf1[n]);
+			}
+			applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
+			free(table);
+
+			NVAPI_VFP_CURVE *curve;
+			NV_INIT_STRUCT_ALLOC(NVAPI_VFP_CURVE, curve);
+			memcpy(curve->mask, boost->mask, 12);
+			ret = NvAPI_DLL_GetVFPCurve(phys[devNum], curve);
+			gpuClocks = 0, memClocks = 0;
+			for(n = 0; n < 80; n++)
+			{
+				if(curve->gpuEntries[n].freq_kHz || curve->gpuEntries[n].volt_uV)
+				{
+					//	applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve->gpuEntries[n].freq_kHz/1000, curve->gpuEntries[n].volt_uV/1000);
+					gpuClocks++;
+				}
+			}
+			for(n = 0; n < 23; n++)
+			{
+				if(curve->memEntries[n].freq_kHz || curve->memEntries[n].volt_uV)
+				{
+					//	applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve->memEntries[n].freq_kHz/1000, curve->memEntries[n].volt_uV/1000);
+					memClocks++;
+				}
+			}
+			for(n = 0; n < 1064; n++)
+			{
+				if(curve->buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve->buf1[n]);
+			}
+			applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
+			free(curve);
+		}
+
+		// Maxwell
+		else
+		{
+			NVAPI_VOLTAGES_TABLE* volts;
+			NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGES_TABLE, volts);
+			int entries = 0;
+			ret = NvAPI_DLL_GetVoltages(phys[devNum], volts);
+			for(n = 0; n < 128; n++)
+			{
+				if(volts->entries[n].volt_uV)
+					entries++;
+			}
+			applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
+			free(volts);
+		}
+
+		NV_DISPLAY_DRIVER_MEMORY_INFO* meminfo;
+		NV_INIT_STRUCT_ON(NV_DISPLAY_DRIVER_MEMORY_INFO, meminfo, mem);
+		meminfo->version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
+		if((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], meminfo)) == NVAPI_OK)
+		{
+			applog(LOG_RAW, " Memory: %u MB, %.1f used", meminfo->dedicatedVideoMemory / 1024,
+				(double)(meminfo->availableDedicatedVideoMemory - meminfo->curAvailableDedicatedVideoMemory) / 1024);
+		}
+#if 0 /* some undetermined stats */
+		NVAPI_GPU_PERF_INFO pi = {0};
+		pi.version = NVAPI_GPU_PERF_INFO_VER;
+		ret = NvAPI_DLL_PerfPoliciesGetInfo(phys[devNum], &pi);
+		NVAPI_GPU_PERF_STATUS ps = {0};
+		ps.version = NVAPI_GPU_PERF_STATUS_VER;
+		ret = NvAPI_DLL_PerfPoliciesGetStatus(phys[devNum], &ps);
+		applog(LOG_BLUE, "%llx %lld. %lld. %llx %llx %llx", ps.timeRef, ps.val1, ps.val2, ps.values[0], ps.values[1], ps.values[2]);
 #endif
-	return rpm;
-}
 
+#endif
+		free(mem);
+		return 0;
+	}
 
-float gpu_temp(struct cgpu_info *gpu)
-{
-	float tc = 0.0;
-	unsigned int tmp = 0;
-	if (hnvml) {
-		nvml_get_tempC(hnvml, gpu->gpu_id, &tmp);
-		tc = (float)tmp;
+	// workaround for buggy driver 378.49
+	unsigned int nvapi_get_gpu_clock(unsigned int devNum)
+	{
+		NvAPI_Status ret = NVAPI_OK;
+		unsigned int freq = 0;
+		NV_GPU_CLOCK_FREQUENCIES *freqs;
+		NV_INIT_STRUCT_ALLOC(NV_GPU_CLOCK_FREQUENCIES, freqs);
+		freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
+		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+		if(ret == NVAPI_OK)
+		{
+			freq = freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000;
+		}
+		free(freqs);
+		return freq; // in MHz
 	}
-#ifdef WIN32
-	else {
-		nvapi_temperature(nvapi_dev_map[gpu->gpu_id], &tmp);
-		tc = (float)tmp;
+
+	uint8_t nvapi_get_plimit(unsigned int devNum)
+	{
+		NvAPI_Status ret = NVAPI_OK;
+		NVAPI_GPU_POWER_STATUS pol = {0};
+		pol.version = NVAPI_GPU_POWER_STATUS_VER;
+		if((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK)
+		{
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			if(opt_debug)
+				applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string);
+			return 0;
+		}
+		return (uint8_t)(pol.entries[0].power / 1000); // in percent
 	}
-#endif
-	return tc;
-}
 
-int gpu_pstate(struct cgpu_info *gpu)
-{
-	int pstate = -1;
-	int support = -1;
-	if (hnvml) {
-		support = nvml_get_pstate(hnvml, gpu->gpu_id, &pstate);
+	int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
+	{
+		NvAPI_Status ret = NVAPI_OK;
+		uint32_t val = percent * 1000;
+
+		NVAPI_GPU_POWER_INFO nfo = {0};
+		nfo.version = NVAPI_GPU_POWER_INFO_VER;
+		ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
+		if(ret == NVAPI_OK)
+		{
+			if(val == 0)
+				val = nfo.entries[0].def_power;
+			else if(val < nfo.entries[0].min_power)
+				val = nfo.entries[0].min_power;
+			else if(val > nfo.entries[0].max_power)
+				val = nfo.entries[0].max_power;
+		}
+
+		NVAPI_GPU_POWER_STATUS pol = {0};
+		pol.version = NVAPI_GPU_POWER_STATUS_VER;
+		pol.flags = 1;
+		pol.entries[0].power = val;
+		if((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK)
+		{
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			if(opt_debug)
+				applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string);
+			return -1;
+		}
+		return ret;
 	}
-#ifdef WIN32
-	if (support == -1) {
-		unsigned int pst = 0;
-		nvapi_getpstate(nvapi_dev_map[gpu->gpu_id], &pst);
-		pstate = (int) pst;
+
+	int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
+	{
+		NvAPI_Status ret;
+		uint32_t val = limit;
+
+		if(devNum >= nvapi_dev_cnt)
+			return -ENODEV;
+
+		NV_GPU_THERMAL_SETTINGS tset = {0};
+		NVAPI_GPU_THERMAL_INFO tnfo = {0};
+		NVAPI_GPU_THERMAL_LIMIT tlim = {0};
+		tset.version = NV_GPU_THERMAL_SETTINGS_VER;
+		NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset);
+		tnfo.version = NVAPI_GPU_THERMAL_INFO_VER;
+		NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo);
+		tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER;
+		if((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK)
+		{
+			tlim.entries[0].value = val << 8;
+			tlim.flags = 1;
+			ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
+			if(ret == NVAPI_OK)
+			{
+				applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]",
+					   devNum, val, tset.sensor[0].currentTemp,
+					   tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
+			}
+			else
+			{
+				NvAPI_ShortString string;
+				NvAPI_GetErrorMessage(ret, string);
+				applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string,
+					   tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
+			}
+		}
+		return (int)ret;
 	}
+
+	int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
+	{
+		NvAPI_Status ret;
+		NvS32 delta = 0;
+
+		if(devNum >= nvapi_dev_cnt)
+			return -ENODEV;
+#if 0
+		// wrong api to get default base clock when modified, cuda props seems fine
+		NV_GPU_CLOCK_FREQUENCIES freqs = {0};
+		freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
+		freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
+		if(ret == NVAPI_OK)
+		{
+			delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
+		}
+		NV_GPU_PERF_PSTATES_INFO deffreqs = {0};
+		deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
+		ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
+		if(ret == NVAPI_OK)
+		{
+			if(deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
+				delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq * 2;
+		}
 #endif
-	return pstate;
-}
 
-int gpu_busid(struct cgpu_info *gpu)
-{
-	int busid = -1;
-	int support = -1;
-	if (hnvml) {
-		support = nvml_get_busid(hnvml, gpu->gpu_id, &busid);
-	}
-#ifdef WIN32
-	if (support == -1) {
-		busid = device_bus_ids[gpu->gpu_id];
+		cudaDeviceProp props = {0};
+		NvU32 busId = 0xFFFF;
+		ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
+		for(int d = 0; d < (int)nvapi_dev_cnt; d++)
+		{
+			// unsure about devNum, so be safe
+			cudaGetDeviceProperties(&props, d);
+			if(props.pciBusID == busId)
+			{
+				delta = (clock * 1000) - props.clockRate;
+				break;
+			}
+		}
+
+		if(delta == (clock * 1000))
+			return ret;
+
+		NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = {0};
+		pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+		pset1.numPstates = 1;
+		pset1.numClocks = 1;
+		// Ok on both 1080 and 970
+		pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
+		pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
+		ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+		if(ret == NVAPI_OK)
+		{
+			applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta / 1000);
+		}
+		return ret;
 	}
-#endif
-	return busid;
-}
 
-unsigned int gpu_power(struct cgpu_info *gpu)
-{
-	unsigned int mw = 0;
-	int support = -1;
-	if(hnvml)
+	int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
 	{
-		support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw);
+		NvAPI_Status ret;
+		NvS32 delta = 0;
+
+		if(devNum >= nvapi_dev_cnt)
+			return -ENODEV;
+
+		// wrong to get default base clock (when modified) on maxwell (same as cuda props one)
+		NV_GPU_CLOCK_FREQUENCIES freqs = {0};
+		freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
+		freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
+		if(ret == NVAPI_OK)
+		{
+			delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
+		}
+
+		// seems ok on maxwell and pascal for the mem clocks
+		NV_GPU_PERF_PSTATES_INFO deffreqs = {0};
+		deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
+		ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
+		if(ret == NVAPI_OK)
+		{
+			if(deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
+				delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
+		}
+
+		if(delta == (clock * 1000))
+			return ret;
+
+		// todo: bounds check with GetPstates20
+
+		NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = {0};
+		pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+		pset1.numPstates = 1;
+		pset1.numClocks = 1;
+		pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
+		pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
+		ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+		if(ret == NVAPI_OK)
+		{
+			applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta / 1000);
+		}
+		return ret;
 	}
-#ifdef WIN32
-	if(support == -1)
+
+	static int nvapi_set_memoffset(unsigned int devNum, int32_t delta, bool log = true)
 	{
-		unsigned int pct = 0;
-		nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
-		pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
-		pct /= 100;
-		mw = pct; // to fix
+		NvAPI_Status ret;
+		NvS32 deltaKHz = delta * 1000;
+
+		if(devNum >= nvapi_dev_cnt)
+			return -ENODEV;
+
+		// todo: bounds check with GetPstates20
+
+		NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = {0};
+		pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+		pset1.numPstates = 1;
+		pset1.numClocks = 1;
+		pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
+		pset1.pstates[0].clocks[0].freqDelta_kHz.value = deltaKHz;
+		ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+		if(ret == NVAPI_OK)
+		{
+			if(log) applog(LOG_INFO, "GPU #%u: Memory clock offset set to %+d MHz", devNum, deltaKHz / 1000);
+			need_memclockrst = true;
+		}
+		return ret;
 	}
-#endif
-	if(gpu->gpu_power > 0)
+
+	// Replacement for WIN32 CUDA 6.5 on pascal
+	int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
 	{
-		// average
-		mw = (gpu->gpu_power + mw) / 2;
+		NvAPI_Status ret = NVAPI_OK;
+		NV_DISPLAY_DRIVER_MEMORY_INFO mem = {0};
+		mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
+		unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
+		if((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK)
+		{
+			*total = (uint64_t)mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
+			*free = (uint64_t)mem.curAvailableDedicatedVideoMemory;
+		}
+		return (int)ret;
 	}
-	return mw;
-}
 
-int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
-{
-	nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
-	uint32_t gpu_clk = 0, mem_clk = 0;
-	int n = nvmlh->cuda_nvml_device_id[dev_id];
-	if(n < 0 || n >= nvmlh->nvml_gpucount)
-		return -ENODEV;
+	int nvapi_init()
+	{
+		int num_gpus = cuda_num_devices();
+		NvAPI_Status ret = NvAPI_Initialize();
+		if(ret != NVAPI_OK)
+		{
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			if(opt_debug)
+				applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string);
+			return -1;
+		}
 
-	if(!device_plimit[dev_id])
-		return 0; // nothing to do
+		ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt);
+		if(ret != NVAPI_OK)
+		{
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			if(opt_debug)
+				applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string);
+			return -1;
+		}
 
-	if(!nvmlh->nvmlDeviceSetPowerManagementLimit)
-		return -ENOSYS;
+		for(int g = 0; g < num_gpus; g++)
+		{
+			cudaDeviceProp props;
+			if(cudaGetDeviceProperties(&props, g) == cudaSuccess)
+			{
+				device_bus_ids[g] = props.pciBusID;
+			}
+			nvapi_dev_map[g] = g; // default mapping
+		}
 
-	uint32_t plimit = device_plimit[dev_id] * 1000;
-	uint32_t pmin = 1000, pmax = 0, prev_limit = 0;
-	if(nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
-		rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
+		for(NvU8 i = 0; i < nvapi_dev_cnt; i++)
+		{
+			NvAPI_ShortString name;
+			ret = NvAPI_GPU_GetFullName(phys[i], name);
+			if(ret == NVAPI_OK)
+			{
+				for(int g = 0; g < num_gpus; g++)
+				{
+					NvU32 busId;
+					ret = NvAPI_GPU_GetBusId(phys[i], &busId);
+					if(ret == NVAPI_OK && busId == device_bus_ids[g])
+					{
+						nvapi_dev_map[g] = i;
+						if(opt_debug)
+							applog(LOG_DEBUG, "CUDA GPU %d matches NVAPI GPU %d by busId %u",
+								   g, i, busId);
+						break;
+					}
+				}
+			}
+			else
+			{
+				NvAPI_ShortString string;
+				NvAPI_GetErrorMessage(ret, string);
+				applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
+			}
+		}
+#if 0
+		if(opt_debug)
+		{
+			NvAPI_ShortString ver;
+			NvAPI_GetInterfaceVersionString(ver);
+			applog(LOG_DEBUG, "%s", ver);
+		}
+#endif
 
-	if(rc != NVML_SUCCESS)
+		NvU32 udv;
+		NvAPI_ShortString str;
+		ret = NvAPI_SYS_GetDriverAndBranchVersion(&udv, str);
+		if(ret == NVAPI_OK)
+		{
+			sprintf(driver_version, "%d.%02d", udv / 100, udv % 100);
+		}
+
+		return 0;
+	}
+
+	int nvapi_init_settings()
 	{
-		if(!nvmlh->nvmlDeviceGetPowerManagementLimit)
-			return -ENOSYS;
+		// nvapi.dll
+		int ret = nvapi_dll_init();
+		if(ret != NVAPI_OK)
+			return ret;
+
+		if(!opt_n_threads)
+		{
+			opt_n_threads = active_gpus;
+		}
+
+		for(int n = 0; n < opt_n_threads; n++)
+		{
+			int dev_id = device_map[n % MAX_GPUS];
+			if(device_plimit[dev_id] && !nvml_plimit_set)
+			{
+				if(nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK)
+				{
+					uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]);
+					gpulog(LOG_INFO, n, "Power limit is set to %u%%", res);
+				}
+			}
+			if(device_tlimit[dev_id])
+			{
+				nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]);
+			}
+			if(device_gpu_clocks[dev_id])
+			{
+				ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]);
+				if(ret)
+				{
+					NvAPI_ShortString string;
+					NvAPI_GetErrorMessage((NvAPI_Status)ret, string);
+					gpulog(LOG_WARNING, n, "nvapi_set_gpuclock %s", string);
+				}
+			}
+			if(device_mem_offsets[dev_id])
+			{
+				ret = nvapi_set_memoffset(nvapi_dev_map[dev_id], device_mem_offsets[dev_id]);
+				if(ret)
+				{
+					NvAPI_ShortString string;
+					NvAPI_GetErrorMessage((NvAPI_Status)ret, string);
+					gpulog(LOG_WARNING, n, "nvapi_set_memoffset %s", string);
+				}
+			}
+			else if(device_mem_clocks[dev_id])
+			{
+				ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]);
+				if(ret)
+				{
+					NvAPI_ShortString string;
+					NvAPI_GetErrorMessage((NvAPI_Status)ret, string);
+					gpulog(LOG_WARNING, n, "nvapi_set_memclock %s", string);
+				}
+			}
+			if(device_pstate[dev_id])
+			{
+				// dunno how via nvapi or/and pascal
+			}
+			if(device_led[dev_id] != -1)
+			{
+				int err = nvapi_set_led(nvapi_dev_map[dev_id], device_led[dev_id], device_name[dev_id]);
+				if(err != 0)
+				{
+					gpulog(LOG_WARNING, n, "Unable to set led value (err %d)", err);
+				}
+				device_led_state[dev_id] = device_led[dev_id];
+			}
+		}
+
+		return ret;
 	}
-	nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit);
-	if(!pmax) pmax = prev_limit;
 
-	plimit = min(plimit, pmax);
-	plimit = max(plimit, pmin);
-	rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
-	if(rc != NVML_SUCCESS)
+	void nvapi_toggle_clocks(int thr_id, bool enable)
 	{
-#ifndef WIN32
-		applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
-#endif
-		return -1;
+		int dev_id = device_map[thr_id % MAX_GPUS];
+		if(device_mem_offsets[dev_id])
+		{
+			nvapi_set_memoffset(nvapi_dev_map[dev_id], enable ? device_mem_offsets[dev_id] : 0, false);
+		}
 	}
-	else
+
+	unsigned int nvapi_devnum(int dev_id)
 	{
-		device_plimit[dev_id] = plimit / 1000;
-		nvml_plimit_set = true;
+		return nvapi_dev_map[dev_id];
 	}
 
-	if(!opt_quiet)
+	int nvapi_devid(unsigned int devNum)
 	{
-		applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
-			   dev_id, plimit / 1000U, pmin / 1000U, pmax / 1000U);
+		for(int i = 0; i < opt_n_threads; i++)
+		{
+			int dev_id = device_map[i % MAX_GPUS];
+			if(nvapi_dev_map[dev_id] = devNum)
+				return dev_id;
+		}
+		return 0;
 	}
 
-	limit_prev[dev_id] = prev_limit;
-	return 1;
-}
+#endif /* WIN32 : Windows specific (nvapi) */
 
-uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id)
-{
-	uint32_t plimit = 0;
-	int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1;
-	if(n < 0 || n >= nvmlh->nvml_gpucount)
-		return 0;
+	/* api functions -------------------------------------- */
 
-	if(nvmlh->nvmlDeviceGetPowerManagementLimit)
+	// assume 2500 rpm as default, auto-updated if more
+	static unsigned int fan_speed_max = 2500;
+
+	unsigned int gpu_fanpercent(struct cgpu_info *gpu)
 	{
-		nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit);
+		unsigned int pct = 0;
+		if(hnvml)
+		{
+			nvml_get_fanpcnt(hnvml, gpu->gpu_id, &pct);
+		}
+#ifdef WIN32
+		else
+		{
+			unsigned int rpm = 0;
+			nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
+			pct = (rpm * 100) / fan_speed_max;
+			if(pct > 100)
+			{
+				pct = 100;
+				fan_speed_max = rpm;
+			}
+		}
+#endif
+		return pct;
 	}
-	return plimit;
-}
 
-unsigned int gpu_plimit(struct cgpu_info *gpu)
-{
-	unsigned int mw = 0;
-	int support = -1;
-	if(hnvml)
+	unsigned int gpu_fanrpm(struct cgpu_info *gpu)
 	{
-		mw = nvml_get_plimit(hnvml, gpu->gpu_id);
-		support = (mw > 0);
-	}
+		unsigned int rpm = 0;
 #ifdef WIN32
-	// NVAPI value is in % (< 100 so)
-	if(support == -1)
-	{
-		mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
-	}
+		nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
 #endif
-	return mw;
-}
+		return rpm;
+	}
 
-static int translate_vendor_id(uint16_t vid, char *vendorname)
-{
-	struct VENDORS {
-		const uint16_t vid;
-		const char *name;
-	} vendors[] = {
-		{ 0x1043, "ASUS" },
-		{ 0x107D, "Leadtek" },
-		{ 0x10B0, "Gainward" },
-		// { 0x10DE, "NVIDIA" },
-		{ 0x1458, "Gigabyte" },
-		{ 0x1462, "MSI" },
-		{ 0x154B, "PNY" },
-		{ 0x1682, "XFX" },
-		{ 0x196D, "Club3D" },
-		{ 0x19DA, "Zotac" },
-		{ 0x19F1, "BFG" },
-		{ 0x1ACC, "PoV" },
-		{ 0x1B4C, "KFA2" },
-		{ 0x3842, "EVGA" },
-		{ 0x7377, "Colorful" },
-		{ 0, "" }
-	};
-
-	if (!vendorname)
-		return -EINVAL;
 
-	for(int v=0; v < ARRAY_SIZE(vendors); v++) {
-		if (vid == vendors[v].vid) {
-			strcpy(vendorname, vendors[v].name);
-			return vid;
+	float gpu_temp(struct cgpu_info *gpu)
+	{
+		float tc = 0.0;
+		unsigned int tmp = 0;
+		if(hnvml)
+		{
+			nvml_get_tempC(hnvml, gpu->gpu_id, &tmp);
+			tc = (float)tmp;
+		}
+#ifdef WIN32
+		else
+		{
+			nvapi_temperature(nvapi_dev_map[gpu->gpu_id], &tmp);
+			tc = (float)tmp;
 		}
+#endif
+		return tc;
 	}
-	if (opt_debug && vid != 0x10DE)
-		applog(LOG_DEBUG, "nvml: Unknown vendor %04x\n", vid);
-	return 0;
-}
 
-#ifdef HAVE_PCIDEV
-extern "C" {
-#include <pci/pci.h>
-}
-static int linux_gpu_vendor(uint8_t pci_bus_id, char* vendorname, uint16_t &pid)
-{
-	uint16_t subvendor = 0;
-	struct pci_access *pci;
-	struct pci_dev *dev;
-	uint16_t subdevice;
-
-	if (!vendorname)
-		return -EINVAL;
+	int gpu_pstate(struct cgpu_info *gpu)
+	{
+		int pstate = -1;
+		int support = -1;
+		if(hnvml)
+		{
+			support = nvml_get_pstate(hnvml, gpu->gpu_id, &pstate);
+		}
+#ifdef WIN32
+		if(support == -1)
+		{
+			unsigned int pst = 0;
+			nvapi_getpstate(nvapi_dev_map[gpu->gpu_id], &pst);
+			pstate = (int)pst;
+		}
+#endif
+		return pstate;
+	}
 
-	pci = pci_alloc();
-	if (!pci)
-		return -ENODEV;
+	int gpu_busid(struct cgpu_info *gpu)
+	{
+		int busid = -1;
+		int support = -1;
+		if(hnvml)
+		{
+			support = nvml_get_busid(hnvml, gpu->gpu_id, &busid);
+		}
+#ifdef WIN32
+		if(support == -1)
+		{
+			busid = device_bus_ids[gpu->gpu_id];
+		}
+#endif
+		return busid;
+	}
 
-	pci_init(pci);
-	pci_scan_bus(pci);
+	unsigned int gpu_power(struct cgpu_info *gpu)
+	{
+		unsigned int mw = 0;
+		int support = -1;
+		if(hnvml)
+		{
+			support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw);
+		}
+#ifdef WIN32
+		if(support == -1)
+		{
+			unsigned int pct = 0;
+			nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
+			pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
+			pct /= 100;
+			mw = pct; // to fix
+		}
+#endif
+		if(gpu->gpu_power > 0)
+		{
+			// average
+			mw = (gpu->gpu_power + mw) / 2;
+		}
+		return mw;
+	}
 
-	for(dev = pci->devices; dev; dev = dev->next)
+	unsigned int gpu_plimit(struct cgpu_info *gpu)
 	{
-		if (dev->bus == pci_bus_id  && dev->vendor_id == 0x10DE)
+		unsigned int mw = 0;
+		int support = -1;
+		if(hnvml)
 		{
-			if (!(dev->known_fields & PCI_FILL_CLASS))
-				pci_fill_info(dev, PCI_FILL_CLASS);
-			if (dev->device_class != PCI_CLASS_DISPLAY_VGA)
-				continue;
-			subvendor = pci_read_word(dev, PCI_SUBSYSTEM_VENDOR_ID);
-			subdevice = pci_read_word(dev, PCI_SUBSYSTEM_ID); // model
+			mw = nvml_get_plimit(hnvml, gpu->gpu_id);
+			support = (mw > 0);
+		}
+#ifdef WIN32
+		// NVAPI value is in % (< 100 so)
+		if(support == -1)
+		{
+			mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
+		}
+#endif
+		return mw;
+	}
 
-			translate_vendor_id(subvendor, vendorname);
+	static int translate_vendor_id(uint16_t vid, char *vendorname)
+	{
+		struct VENDORS
+		{
+			const uint16_t vid;
+			const char *name;
+		} vendors[] = {
+			{0x1043, "ASUS"},
+			{0x1048, "Elsa"},
+			{0x107D, "Leadtek"},
+			{0x10B0, "Gainward"},
+			// { 0x10DE, "NVIDIA" },
+			{0x1458, "Gigabyte"},
+			{0x1462, "MSI"},
+			{0x154B, "PNY"}, // maybe storage devices
+			{0x1569, "Palit"},
+			{0x1682, "XFX"},
+			{0x196D, "Club3D"},
+			{0x196E, "PNY"},
+			{0x19DA, "Zotac"},
+			{0x19F1, "BFG"},
+			{0x1ACC, "PoV"},
+			{0x1B4C, "Galax"}, // KFA2 in EU, to check on Pascal cards
+			{0x3842, "EVGA"},
+			{0x7377, "Colorful"},
+			{0, ""}
+		};
+
+		if(!vendorname)
+			return -EINVAL;
+
+		for(int v = 0; v < ARRAY_SIZE(vendors); v++)
+		{
+			if(vid == vendors[v].vid)
+			{
+				strcpy(vendorname, vendors[v].name);
+				return vid;
+			}
 		}
+		if(opt_debug && vid != 0x10DE)
+			applog(LOG_DEBUG, "nvml: Unknown vendor %04x\n", vid);
+		return 0;
 	}
-	pci_cleanup(pci);
-	return (int) subvendor;
-}
-#endif
 
-int gpu_vendor(uint8_t pci_bus_id, char *vendorname)
-{
-#ifdef HAVE_PCIDEV
-	uint16_t pid = 0;
-	return linux_gpu_vendor(pci_bus_id, vendorname, pid);
-#else
-	uint16_t vid = 0, pid = 0;
-	if (hnvml) { // may not be initialized on start...
-		for (int id=0; id < hnvml->nvml_gpucount; id++) {
-			if (hnvml->nvml_pci_bus_id[id] == pci_bus_id) {
-				int dev_id = hnvml->nvml_cuda_device_id[id];
-				nvml_get_info(hnvml, dev_id, vid, pid);
+	int gpu_vendor(uint8_t pci_bus_id, char *vendorname)
+	{
+		uint16_t vid = 0, pid = 0;
+		if(hnvml)
+		{ // may not be initialized on start...
+			for(int id = 0; id < hnvml->nvml_gpucount; id++)
+			{
+				if(hnvml->nvml_pci_bus_id[id] == pci_bus_id)
+				{
+					int dev_id = hnvml->nvml_cuda_device_id[id];
+					nvml_get_info(hnvml, dev_id, vid, pid);
+				}
 			}
 		}
-	} else {
+		else
+		{
 #ifdef WIN32
-		for (unsigned id = 0; id < nvapi_dev_cnt; id++) {
-			if (device_bus_ids[id] == pci_bus_id) {
-				nvapi_getinfo(nvapi_dev_map[id], vid, pid);
-				break;
+			for(unsigned id = 0; id < nvapi_dev_cnt; id++)
+			{
+				if(device_bus_ids[id] == pci_bus_id)
+				{
+					nvapi_getinfo(nvapi_dev_map[id], vid, pid);
+					break;
+				}
 			}
-		}
 #endif
+		}
+		return translate_vendor_id(vid, vendorname);
 	}
-	return translate_vendor_id(vid, vendorname);
-#endif
-}
 
-int gpu_info(struct cgpu_info *gpu)
-{
-	char vendorname[32] = { 0 };
-	int id = gpu->gpu_id;
-	uint8_t bus_id = 0;
+	int gpu_info(struct cgpu_info *gpu)
+	{
+		char vendorname[32] = {0};
+		int id = gpu->gpu_id;
+		uint8_t bus_id = 0;
 
-	gpu->nvml_id = -1;
-	gpu->nvapi_id = -1;
+		gpu->nvml_id = -1;
+		gpu->nvapi_id = -1;
 
-	if (id < 0)
-		return -1;
+		if(id < 0)
+			return -1;
 
-	if (hnvml) {
-		gpu->nvml_id = (int8_t) hnvml->cuda_nvml_device_id[id];
-#ifdef HAVE_PCIDEV
-		gpu->gpu_vid = linux_gpu_vendor(hnvml->nvml_pci_bus_id[id], vendorname, gpu->gpu_pid);
-		if (!gpu->gpu_vid || !gpu->gpu_pid)
+		if(hnvml)
+		{
+			gpu->nvml_id = (int8_t)hnvml->cuda_nvml_device_id[id];
 			nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid);
-#else
-		nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid);
+			nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn));
+			nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc));
+		}
+#ifdef WIN32
+		gpu->nvapi_id = (int8_t)nvapi_dev_map[id];
+		nvapi_getinfo(nvapi_dev_map[id], gpu->gpu_vid, gpu->gpu_pid);
+		nvapi_getserial(nvapi_dev_map[id], gpu->gpu_sn, sizeof(gpu->gpu_sn));
+		nvapi_getbios(nvapi_dev_map[id], gpu->gpu_desc, sizeof(gpu->gpu_desc));
+#endif
+		return 0;
+	}
+
+#endif /* USE_WRAPNVML */
+
+	static int rgb_percent(int RGB, int percent)
+	{
+		uint8_t* comp = (uint8_t*)&RGB;
+		int res = ((percent*comp[2]) / 100) << 16;
+		res += ((percent*comp[1]) / 100) << 8;
+		return res + ((percent*comp[0]) / 100);
+	}
+
+	void gpu_led_on(int dev_id)
+	{
+#if defined(WIN32) && defined(USE_WRAPNVML)
+		int value = device_led[dev_id];
+		if(device_led_state[dev_id] != value)
+		{
+			if(nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
+				device_led_state[dev_id] = value;
+		}
 #endif
-		nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn));
-		nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc));
 	}
+
+	void gpu_led_percent(int dev_id, int percent)
+	{
+#if defined(WIN32) && defined(USE_WRAPNVML)
+		int value = rgb_percent(device_led[dev_id], percent);
+		if(device_led_state[dev_id] != value)
+		{
+			if(nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
+				device_led_state[dev_id] = value;
+		}
+#endif
+	}
+
+	void gpu_led_off(int dev_id)
+	{
+#if defined(WIN32) && defined(USE_WRAPNVML)
+		if(device_led_state[dev_id])
+		{
+			if(nvapi_set_led(nvapi_dev_map[dev_id], 0, device_name[dev_id]) == 0)
+				device_led_state[dev_id] = 0;
+		}
+#endif
+	}
+
+#ifdef USE_WRAPNVML
+	extern double thr_hashrates[MAX_GPUS];
+	extern bool opt_debug_threads;
+	extern bool opt_hwmonitor;
+	extern int num_cpus;
+
+	void *monitor_thread(void *userdata)
+	{
+		int thr_id = -1;
+
+		while(!abort_flag && !opt_quiet)
+		{
+			// This thread monitors card's power lazily during scans, one at a time...
+			thr_id = (thr_id + 1) % opt_n_threads;
+			struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
+			int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id);
+
+			if(hnvml != NULL && cgpu)
+			{
+				char khw[32] = {0};
+				uint64_t clock = 0, mem_clock = 0;
+				uint32_t fanpercent = 0, power = 0;
+				double tempC = 0, khs_per_watt = 0;
+				uint32_t counter = 0;
+				int max_loops = 1000;
+
+				pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock);
+
+				do
+				{
+					unsigned int tmp_clock = 0, tmp_memclock = 0;
+					nvml_get_current_clocks(dev_id, &tmp_clock, &tmp_memclock);
 #ifdef WIN32
-	gpu->nvapi_id = (int8_t) nvapi_dev_map[id];
-	nvapi_getinfo(nvapi_dev_map[id], gpu->gpu_vid, gpu->gpu_pid);
-	nvapi_getserial(nvapi_dev_map[id], gpu->gpu_sn, sizeof(gpu->gpu_sn));
-	nvapi_getbios(nvapi_dev_map[id], gpu->gpu_desc, sizeof(gpu->gpu_desc));
+					if(tmp_clock < 200)
+					{
+						// workaround for buggy drivers 378.x (real clock)
+						tmp_clock = nvapi_get_gpu_clock(nvapi_dev_map[dev_id]);
+					}
 #endif
-	return 0;
-}
+					if(tmp_clock < 200)
+					{
+						// some older cards only report a base clock with cuda props.
+						if(cuda_gpu_info(cgpu) == 0)
+						{
+							tmp_clock = cgpu->gpu_clock / 1000;
+							tmp_memclock = cgpu->gpu_memclock / 1000;
+						}
+					}
+					clock += tmp_clock;
+					mem_clock += tmp_memclock;
+					tempC += gpu_temp(cgpu);
+					fanpercent += gpu_fanpercent(cgpu);
+					power += gpu_power(cgpu);
+					counter++;
+
+					usleep(50000);
+					if(abort_flag) goto abort;
+
+				} while(cgpu->monitor.sampling_flag && (--max_loops));
+
+				cgpu->monitor.gpu_temp = (uint32_t)(tempC / counter);
+				cgpu->monitor.gpu_fan = fanpercent / counter;
+				cgpu->monitor.gpu_power = power / counter;
+				cgpu->monitor.gpu_clock = (uint32_t)(clock / counter);
+				cgpu->monitor.gpu_memclock = (uint32_t)(mem_clock / counter);
+
+				if(power)
+				{
+					khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]);
+					khs_per_watt = khs_per_watt / ((double)power / counter);
+					format_hashrate(khs_per_watt * 1000, khw);
+					if(strlen(khw))
+						sprintf(&khw[strlen(khw) - 1], "W %uW ", cgpu->monitor.gpu_power / 1000);
+				}
 
-#endif /* USE_WRAPNVML */
+				if(opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60)
+				{
+					gpulog(LOG_INFO, thr_id, "%u MHz %s%uC FAN %u%%",
+						   cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/,
+						   khw, cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan
+					);
+					cgpu->monitor.tm_displayed = (uint32_t)time(NULL);
+				}
+
+				pthread_mutex_unlock(&cgpu->monitor.lock);
+			}
+			usleep(500); // safety
+		}
+abort:
+		if(opt_debug_threads)
+			applog(LOG_DEBUG, "%s() died", __func__);
+		return NULL;
+	}
+#endif
\ No newline at end of file
diff --git a/nvml.h b/nvml.h
index 71ff20b0..96547f18 100644
--- a/nvml.h
+++ b/nvml.h
@@ -1,27 +1,32 @@
 /*
- * A trivial little dlopen()-based wrapper library for the
- * NVIDIA NVML library, to allow runtime discovery of NVML on an
- * arbitrary system.  This is all very hackish and simple-minded, but
- * it serves my immediate needs in the short term until NVIDIA provides
- * a static NVML wrapper library themselves, hopefully in
- * CUDA 6.5 or maybe sometime shortly after.
- *
- * This trivial code is made available under the "new" 3-clause BSD license,
- * and/or any of the GPL licenses you prefer.
- * Feel free to use the code and modify as you see fit.
- *
- * John E. Stone - john.stone@gmail.com
- *
- */
+* A trivial little dlopen()-based wrapper library for the
+* NVIDIA NVML library, to allow runtime discovery of NVML on an
+* arbitrary system.  This is all very hackish and simple-minded, but
+* it serves my immediate needs in the short term until NVIDIA provides
+* a static NVML wrapper library themselves, hopefully in
+* CUDA 6.5 or maybe sometime shortly after.
+*
+* This trivial code is made available under the "new" 3-clause BSD license,
+* and/or any of the GPL licenses you prefer.
+* Feel free to use the code and modify as you see fit.
+*
+* John E. Stone - john.stone@gmail.com
+*
+*/
 #ifdef USE_WRAPNVML
 
 #include "miner.h"
 
+void *monitor_thread(void *userdata);
+
 typedef void * nvmlDevice_t;
 
+#define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 16
+
 /* our own version of the PCI info struct */
-typedef struct {
-	char bus_id_str[16];             /* string form of bus info */
+typedef struct
+{
+	char bus_id_str[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE];
 	unsigned int domain;
 	unsigned int bus;
 	unsigned int device;
@@ -33,19 +38,22 @@ typedef struct {
 	unsigned int res3;
 } nvmlPciInfo_t;
 
-enum nvmlEnableState_t {
+enum nvmlEnableState_t
+{
 	NVML_FEATURE_DISABLED = 0,
 	NVML_FEATURE_ENABLED = 1,
 	NVML_FEATURE_UNKNOWN = 2
 };
 
-enum nvmlRestrictedAPI_t {
+enum nvmlRestrictedAPI_t
+{
 	NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0,
-	NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1,
+	NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1, // not for GTX cards
 	NVML_RESTRICTED_API_COUNT = 2
 };
 
-enum nvmlReturn_t {
+enum nvmlReturn_t
+{
 	NVML_SUCCESS = 0,
 	NVML_ERROR_UNINITIALIZED = 1,
 	NVML_ERROR_INVALID_ARGUMENT = 2,
@@ -57,22 +65,45 @@ enum nvmlReturn_t {
 	NVML_ERROR_INSUFFICIENT_POWER = 8,
 	NVML_ERROR_DRIVER_NOT_LOADED = 9,
 	NVML_ERROR_TIMEOUT = 10,
+	NVML_ERROR_IRQ_ISSUE = 11,
+	NVML_ERROR_LIBRARY_NOT_FOUND = 12,
+	NVML_ERROR_FUNCTION_NOT_FOUND = 13,
+	NVML_ERROR_CORRUPTED_INFOROM = 14,
+	NVML_ERROR_GPU_IS_LOST = 15,
+	NVML_ERROR_RESET_REQUIRED = 16,
+	NVML_ERROR_OPERATING_SYSTEM = 17,
+	NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18,
+	NVML_ERROR_IN_USE = 19,
 	NVML_ERROR_UNKNOWN = 999
 };
 
-enum nvmlClockType_t {
+enum nvmlClockType_t
+{
 	NVML_CLOCK_GRAPHICS = 0,
 	NVML_CLOCK_SM = 1,
-	NVML_CLOCK_MEM = 2
+	NVML_CLOCK_MEM = 2,
+	NVML_CLOCK_VIDEO = 3,
+	NVML_CLOCK_COUNT
+};
+
+enum nvmlClockId_t
+{
+	NVML_CLOCK_ID_CURRENT = 0,
+	NVML_CLOCK_ID_APP_CLOCK_TARGET = 1,
+	NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2,
+	NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3,
+	NVML_CLOCK_ID_COUNT
 };
 
-enum nvmlPcieUtilCounter_t {
+enum nvmlPcieUtilCounter_t
+{
 	NVML_PCIE_UTIL_TX_BYTES = 0,
 	NVML_PCIE_UTIL_RX_BYTES = 1,
 	NVML_PCIE_UTIL_COUNT
 };
 
-enum nvmlValueType_t {
+enum nvmlValueType_t
+{
 	NVML_VALUE_TYPE_DOUBLE = 0,
 	NVML_VALUE_TYPE_UNSIGNED_INT = 1,
 	NVML_VALUE_TYPE_UNSIGNED_LONG = 2,
@@ -80,77 +111,104 @@ enum nvmlValueType_t {
 	NVML_VALUE_TYPE_COUNT
 };
 
+typedef int nvmlGpuTopologyLevel_t;
+typedef int nvmlNvLinkCapability_t;
+typedef int nvmlNvLinkErrorCounter_t;
+typedef int nvmlNvLinkUtilizationControl_t;
+
 #define NVML_DEVICE_SERIAL_BUFFER_SIZE 30
 #define NVML_DEVICE_UUID_BUFFER_SIZE 80
 #define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32
 
 /*
- * Handle to hold the function pointers for the entry points we need,
- * and the shared library itself.
- */
-typedef struct {
+* Handle to hold the function pointers for the entry points we need,
+* and the shared library itself.
+*/
+typedef struct
+{
 	void *nvml_dll;
 	int nvml_gpucount;
 	int cuda_gpucount;
 	unsigned int *nvml_pci_domain_id;
 	unsigned int *nvml_pci_bus_id;
 	unsigned int *nvml_pci_device_id;
+	unsigned int *nvml_pci_vendor_id;
 	unsigned int *nvml_pci_subsys_id;
 	int *nvml_cuda_device_id;          /* map NVML dev to CUDA dev */
 	int *cuda_nvml_device_id;          /* map CUDA dev to NVML dev */
 	nvmlDevice_t *devs;
 	nvmlEnableState_t *app_clocks;
-	nvmlReturn_t (*nvmlInit)(void);
-	nvmlReturn_t (*nvmlDeviceGetCount)(int *);
-	nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(int, nvmlDevice_t *);
-	nvmlReturn_t (*nvmlDeviceGetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *);
-	nvmlReturn_t (*nvmlDeviceSetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t);
-	nvmlReturn_t (*nvmlDeviceGetDefaultApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
-	nvmlReturn_t (*nvmlDeviceGetApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
-	nvmlReturn_t (*nvmlDeviceSetApplicationsClocks)(nvmlDevice_t, unsigned int, unsigned int);
-	nvmlReturn_t (*nvmlDeviceResetApplicationsClocks)(nvmlDevice_t);
-	nvmlReturn_t (*nvmlDeviceGetSupportedGraphicsClocks)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *arr);
-	nvmlReturn_t (*nvmlDeviceGetSupportedMemoryClocks)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz);
-	nvmlReturn_t (*nvmlDeviceGetClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
-	nvmlReturn_t (*nvmlDeviceGetMaxClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
-	nvmlReturn_t (*nvmlDeviceGetPowerManagementDefaultLimit)(nvmlDevice_t, unsigned int *limit);
-	nvmlReturn_t (*nvmlDeviceGetPowerManagementLimit)(nvmlDevice_t, unsigned int *limit);
-	nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitConstraints)(nvmlDevice_t, unsigned int *min, unsigned int *max);
-	nvmlReturn_t (*nvmlDeviceSetPowerManagementLimit)(nvmlDevice_t device, unsigned int limit);
-	nvmlReturn_t (*nvmlDeviceGetPciInfo)(nvmlDevice_t, nvmlPciInfo_t *);
-	nvmlReturn_t (*nvmlDeviceGetCurrPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen);
-	nvmlReturn_t (*nvmlDeviceGetCurrPcieLinkWidth)(nvmlDevice_t device, unsigned int *width);
-	nvmlReturn_t (*nvmlDeviceGetMaxPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen);
-	nvmlReturn_t (*nvmlDeviceGetMaxPcieLinkWidth)(nvmlDevice_t device, unsigned int *width);
-	nvmlReturn_t (*nvmlDeviceGetName)(nvmlDevice_t, char *, int);
-	nvmlReturn_t (*nvmlDeviceGetTemperature)(nvmlDevice_t, int, unsigned int *);
-	nvmlReturn_t (*nvmlDeviceGetFanSpeed)(nvmlDevice_t, unsigned int *);
-	nvmlReturn_t (*nvmlDeviceGetPerformanceState)(nvmlDevice_t, int *); /* enum */
-	nvmlReturn_t (*nvmlDeviceGetPowerUsage)(nvmlDevice_t, unsigned int *);
-	nvmlReturn_t (*nvmlDeviceGetSerial)(nvmlDevice_t, char *serial, unsigned int len);
-	nvmlReturn_t (*nvmlDeviceGetUUID)(nvmlDevice_t, char *uuid, unsigned int len);
-	nvmlReturn_t (*nvmlDeviceGetVbiosVersion)(nvmlDevice_t, char *version, unsigned int len);
-	nvmlReturn_t (*nvmlSystemGetDriverVersion)(char *version, unsigned int len);
+	nvmlReturn_t(*nvmlInit)(void);
+	nvmlReturn_t(*nvmlDeviceGetCount)(int *);
+	nvmlReturn_t(*nvmlDeviceGetHandleByIndex)(int, nvmlDevice_t *);
+	nvmlReturn_t(*nvmlDeviceGetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *);
+	nvmlReturn_t(*nvmlDeviceSetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t);
+	nvmlReturn_t(*nvmlDeviceGetDefaultApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
+	nvmlReturn_t(*nvmlDeviceGetApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
+	nvmlReturn_t(*nvmlDeviceSetApplicationsClocks)(nvmlDevice_t, unsigned int, unsigned int);
+	nvmlReturn_t(*nvmlDeviceResetApplicationsClocks)(nvmlDevice_t);
+	nvmlReturn_t(*nvmlDeviceGetSupportedGraphicsClocks)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *arr);
+	nvmlReturn_t(*nvmlDeviceGetSupportedMemoryClocks)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz);
+	nvmlReturn_t(*nvmlDeviceGetClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
+	nvmlReturn_t(*nvmlDeviceGetMaxClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
+	nvmlReturn_t(*nvmlDeviceGetPowerManagementDefaultLimit)(nvmlDevice_t, unsigned int *limit);
+	nvmlReturn_t(*nvmlDeviceGetPowerManagementLimit)(nvmlDevice_t, unsigned int *limit);
+	nvmlReturn_t(*nvmlDeviceGetPowerManagementLimitConstraints)(nvmlDevice_t, unsigned int *min, unsigned int *max);
+	nvmlReturn_t(*nvmlDeviceSetPowerManagementLimit)(nvmlDevice_t device, unsigned int limit);
+	nvmlReturn_t(*nvmlDeviceGetPciInfo)(nvmlDevice_t, nvmlPciInfo_t *);
+	nvmlReturn_t(*nvmlDeviceGetCurrPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen);
+	nvmlReturn_t(*nvmlDeviceGetCurrPcieLinkWidth)(nvmlDevice_t device, unsigned int *width);
+	nvmlReturn_t(*nvmlDeviceGetMaxPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen);
+	nvmlReturn_t(*nvmlDeviceGetMaxPcieLinkWidth)(nvmlDevice_t device, unsigned int *width);
+	nvmlReturn_t(*nvmlDeviceGetName)(nvmlDevice_t, char *, int);
+	nvmlReturn_t(*nvmlDeviceGetTemperature)(nvmlDevice_t, int, unsigned int *);
+	nvmlReturn_t(*nvmlDeviceGetFanSpeed)(nvmlDevice_t, unsigned int *);
+	nvmlReturn_t(*nvmlDeviceGetPerformanceState)(nvmlDevice_t, int *); /* enum */
+	nvmlReturn_t(*nvmlDeviceGetPowerUsage)(nvmlDevice_t, unsigned int *);
+	nvmlReturn_t(*nvmlDeviceGetSerial)(nvmlDevice_t, char *serial, unsigned int len);
+	nvmlReturn_t(*nvmlDeviceGetUUID)(nvmlDevice_t, char *uuid, unsigned int len);
+	nvmlReturn_t(*nvmlDeviceGetVbiosVersion)(nvmlDevice_t, char *version, unsigned int len);
+	nvmlReturn_t(*nvmlSystemGetDriverVersion)(char *version, unsigned int len);
 	char* (*nvmlErrorString)(nvmlReturn_t);
-	nvmlReturn_t (*nvmlShutdown)(void);
+	nvmlReturn_t(*nvmlShutdown)(void);
 	// v331
-	nvmlReturn_t (*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit);
+	nvmlReturn_t(*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit);
 	// v340
-	//nvmlReturn_t (*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet);
-	//nvmlReturn_t (*nvmlDeviceSetCpuAffinity)(nvmlDevice_t);
-	//nvmlReturn_t (*nvmlDeviceGetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled);
-	//nvmlReturn_t (*nvmlDeviceSetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t enabled);
+#ifdef __linux__
+	nvmlReturn_t(*nvmlDeviceClearCpuAffinity)(nvmlDevice_t);
+	nvmlReturn_t(*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet);
+	nvmlReturn_t(*nvmlDeviceSetCpuAffinity)(nvmlDevice_t);
+#endif
 	// v346
-	nvmlReturn_t (*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value);
-} nvml_handle;
+	nvmlReturn_t(*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value);
+	// v36x (API 8)
+	nvmlReturn_t(*nvmlDeviceGetClock)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz);
+#ifdef __linux__
+	nvmlReturn_t(*nvmlSystemGetTopologyGpuSet)(unsigned int cpuNumber, unsigned int *count, nvmlDevice_t *deviceArray);
+	nvmlReturn_t(*nvmlDeviceGetTopologyNearestGpus)(nvmlDevice_t, nvmlGpuTopologyLevel_t level, unsigned int *count, nvmlDevice_t *deviceArray);
+	nvmlReturn_t(*nvmlDeviceGetTopologyCommonAncestor)(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuTopologyLevel_t *pathInfo);
+#endif
+	nvmlReturn_t(*nvmlDeviceGetNvLinkState)(nvmlDevice_t, unsigned int link, nvmlEnableState_t *isActive);
+	nvmlReturn_t(*nvmlDeviceGetNvLinkVersion)(nvmlDevice_t, unsigned int link, unsigned int *version);
+	nvmlReturn_t(*nvmlDeviceGetNvLinkCapability)(nvmlDevice_t, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int *capResult);
+	nvmlReturn_t(*nvmlDeviceGetNvLinkRemotePciInfo)(nvmlDevice_t, unsigned int link, nvmlPciInfo_t *pci);
+	nvmlReturn_t(*nvmlDeviceGetNvLinkErrorCounter)(nvmlDevice_t, unsigned int link, nvmlNvLinkErrorCounter_t counter, unsigned long long *counterValue);
+	nvmlReturn_t(*nvmlDeviceResetNvLinkErrorCounters)(nvmlDevice_t, unsigned int link);
+	nvmlReturn_t(*nvmlDeviceSetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control, unsigned int reset);
+	nvmlReturn_t(*nvmlDeviceGetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control);
+	nvmlReturn_t(*nvmlDeviceGetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, unsigned long long *rxcounter, unsigned long long *txcounter);
+	nvmlReturn_t(*nvmlDeviceFreezeNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlEnableState_t freeze);
+	nvmlReturn_t(*nvmlDeviceResetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter);
 
+} nvml_handle;
 
 nvml_handle * nvml_create();
 int nvml_destroy(nvml_handle *nvmlh);
 
-/*
- * Query the number of GPUs seen by NVML
- */
+// Debug informations
+void nvml_print_device_info(int dev_id);
+
+// Query the number of GPUs seen by NVML
 int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount);
 
 int nvml_set_plimit(nvml_handle *nvmlh, int dev_id);
@@ -165,20 +223,52 @@ unsigned int gpu_fanpercent(struct cgpu_info *gpu);
 unsigned int gpu_fanrpm(struct cgpu_info *gpu);
 float gpu_temp(struct cgpu_info *gpu);
 unsigned int gpu_power(struct cgpu_info *gpu);
+unsigned int gpu_plimit(struct cgpu_info *gpu);
 int gpu_pstate(struct cgpu_info *gpu);
 int gpu_busid(struct cgpu_info *gpu);
-unsigned int gpu_power(struct cgpu_info *gpu);
-unsigned int gpu_plimit(struct cgpu_info *gpu);
 
-/* pid/vid, sn and bios rev */
+// pid/vid, sn and bios rev
 int gpu_info(struct cgpu_info *gpu);
-int gpu_vendor(uint8_t pci_bus_id, char *vendorname);
 
+int gpu_vendor(uint8_t pci_bus_id, char *vendorname);
 
 /* nvapi functions */
 #ifdef WIN32
 int nvapi_init();
+int nvapi_init_settings();
+
+// to debug nvapi..
+int nvapi_pstateinfo(unsigned int devNum);
+uint8_t nvapi_get_plimit(unsigned int devNum);
+
+// nvapi devNum from dev_id (cuda GPU #N)
+unsigned int nvapi_devnum(int dev_id);
+int nvapi_devid(unsigned int devNum);
+
+void nvapi_toggle_clocks(int thr_id, bool enable);
+
+// cuda Replacement for 6.5 compat
 int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total);
 #endif
 
 #endif /* USE_WRAPNVML */
+
+void gpu_led_on(int dev_id);
+void gpu_led_percent(int dev_id, int percent);
+void gpu_led_off(int dev_id);
+
+#define LED_MODE_OFF    0
+#define LED_MODE_SHARES 1
+#define LED_MODE_MINING 2
+
+/* ------ nvidia-settings stuff for linux -------------------- */
+
+int nvs_init();
+int nvs_set_clocks(int dev_id);
+void nvs_reset_clocks(int dev_id);
+
+// nvidia-settings (X) devNum from dev_id (cuda GPU #N)
+int8_t nvs_devnum(int dev_id);
+int nvs_devid(int8_t devNum);
+
+extern bool need_nvsettings;
\ No newline at end of file
diff --git a/nvsettings.cpp b/nvsettings.cpp
new file mode 100644
index 00000000..5ea32338
--- /dev/null
+++ b/nvsettings.cpp
@@ -0,0 +1,251 @@
+/**
+ * nvidia-settings command line interface for linux - tpruvot 2017
+ *
+ * Notes: need X setup and running, with an opened X session.
+ *        init speed could be improved, running multiple threads
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h> // pid_t
+
+#include "miner.h"
+#include "nvml.h"
+#include "cuda_runtime.h"
+
+#ifdef __linux__
+
+#define NVS_PATH "/usr/bin/nvidia-settings"
+
+static int8_t nvs_dev_map[MAX_GPUS] = { 0 };
+static uint8_t nvs_bus_ids[MAX_GPUS] = { 0 };
+static int32_t nvs_clocks_set[MAX_GPUS] = { 0 };
+
+extern int32_t device_mem_offsets[MAX_GPUS];
+
+#if 0 /* complicated exec way and not better in fine */
+int nvs_query_fork_int(int nvs_id, const char* field)
+{
+	pid_t pid;
+	int pipes[2] = { 0 };
+	if (pipe(pipes) < 0)
+		return -1;
+
+	if ((pid = fork()) == -1) {
+		close(pipes[0]);
+		close(pipes[1]);
+		return -1;
+	} else if (pid == 0) {
+		char gpu_field[128] = { 0 };
+		sprintf(gpu_field, "[gpu:%d]/%s", nvs_id, field);
+
+		dup2(pipes[1], STDOUT_FILENO);
+		close(pipes[0]);
+		//close(pipes[1]);
+
+		if (-1 == execl(NVS_PATH, "nvidia-settings", "-q", gpu_field, "-t", NULL)) {
+			exit(-1);
+		}
+	} else {
+		int intval = -1;
+		FILE *p = fdopen(pipes[0], "r");
+		close(pipes[1]);
+		if (!p) {
+			applog(LOG_WARNING, "%s: fdopen(%d) failed", __func__, pipes[0]);
+			return -1;
+		}
+		int rc = fscanf(p, "%d", &intval); // BUS 0000:2a:00.0 is read 42
+		if (rc > 0) {
+			//applog(LOG_BLUE, "%s res=%d", field, intval);
+		}
+		fclose(p);
+		close(pipes[0]);
+		return intval;
+	}
+	return -1;
+}
+#endif
+
+int nvs_query_int(int nvs_id, const char* field, int showerr)
+{
+	FILE *fp;
+	char command[256] = { 0 };
+	sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
+	fp = popen(command, "r");
+	if (fp) {
+		int intval = -1;
+		if (!showerr) {
+			int b = fscanf(fp, "%d", &intval);
+			if (!b) {
+				pclose(fp);
+				return -1;
+			}
+		} else {
+			char msg[512] = { 0 };
+			char buf[64] = { 0 };
+			ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
+			while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+				len += snprintf(&msg[len], maxlen-len, "%s ", buf);
+				if (len >= maxlen) break;
+			}
+			if (strstr(msg, "ERROR")) {
+				char *xtra = strstr(msg, "; please run");
+				if (xtra) *xtra = '\0'; // strip noise
+				applog(LOG_INFO, "%s", msg);
+				intval = -1;
+			} else {
+				sscanf(msg, "%d", &intval);
+			}
+		}
+		pclose(fp);
+		return intval;
+	}
+	return -1;
+}
+
+int nvs_query_str(int nvs_id, const char* field, char* output, size_t maxlen)
+{
+	FILE *fp;
+	char command[256] = { 0 };
+	*output = '\0';
+	sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
+	fp = popen(command, "r");
+	if (fp) {
+		char buf[256] = { 0 };
+		ssize_t len=0;
+	        ssize_t bytes=0;
+		while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+			//applog(LOG_BLUE, "%d %s %d", nvs_id, buf, (int) bytes);
+			len += snprintf(&output[len], maxlen-len, "%s ", buf);
+			if (len >= maxlen) break;
+		}
+		pclose(fp);
+		if (strstr(output, "ERROR")) {
+			char *xtra = strstr(output, "; please run");
+			if (xtra) *xtra = '\0'; // strip noise
+			applog(LOG_INFO, "%s", output);
+			*output='\0';
+		}
+		return (int) len;
+	}
+	return -1;
+}
+
+int nvs_set_int(int nvs_id, const char* field, int value)
+{
+	FILE *fp;
+	char command[256] = { 0 };
+	int res = -1;
+	snprintf(command, 256, "%s -a '[gpu:%d]/%s=%d' 2>&1", NVS_PATH, nvs_id, field, value);
+	fp = popen(command, "r");
+	if (fp) {
+		char msg[512] = { 0 };
+		char buf[64] = { 0 };
+		ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
+		while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+			len += snprintf(&msg[len], maxlen-len, "%s ", buf);
+			if (len >= maxlen) break;
+		}
+		if (strstr(msg, "ERROR")) {
+			char *xtra = strstr(msg, "; please run");
+			if (xtra) *xtra = '\0'; // strip noise
+			applog(LOG_INFO, "%s", msg);
+		} else
+			res = 0;
+		pclose(fp);
+	}
+	return res;
+}
+
+int8_t nvs_devnum(int dev_id)
+{
+	return nvs_dev_map[dev_id];
+}
+
+int nvs_devid(int8_t nvs_id)
+{
+	for (int i=0; i < opt_n_threads; i++) {
+		int dev_id = device_map[i % MAX_GPUS];
+		if (nvs_dev_map[dev_id] == nvs_id)
+			return dev_id;
+	}
+	return 0;
+}
+
+int nvs_init()
+{
+	struct stat info;
+	struct timeval tv_start, tv_end, diff;
+	int x_devices = 0;
+	int n_threads = opt_n_threads;
+	if (stat(NVS_PATH, &info))
+		return -ENOENT;
+
+	gettimeofday(&tv_start, NULL);
+
+	for (int d = 0; d < MAX_GPUS; d++) {
+		// this part can be "slow" (100-200ms per device)
+		int res = nvs_query_int(d, "PCIBus", 1);
+		if (res < 0) break;
+		nvs_bus_ids[d] = 0xFFu & res;
+		x_devices++;
+	}
+
+	if (opt_debug) {
+		gettimeofday(&tv_end, NULL);
+		timeval_subtract(&diff, &tv_end, &tv_start);
+		applog(LOG_DEBUG, "nvidia-settings pci bus queries took %.2f ms",
+			(1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec));
+	}
+
+	if (!x_devices)
+		return -ENODEV;
+	if (!n_threads) n_threads = cuda_num_devices();
+	for (int i = 0; i < n_threads; i++) {
+		int dev_id = device_map[i % MAX_GPUS];
+		cudaDeviceProp props;
+		if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
+			for (int8_t d = 0; d < x_devices; d++) {
+				if (nvs_bus_ids[d] == (uint8_t) props.pciBusID) {
+					gpulog(LOG_DEBUG, i, "matches X gpu:%d by busId %u",
+						(int) d, (uint) nvs_bus_ids[d]);
+					nvs_dev_map[dev_id] = d;
+					/* char buf[1024] = { 0 };
+					nvs_query_str(d, "GPUCurrentClockFreqsString", buf, sizeof(buf)-1);
+					gpulog(LOG_DEBUG, d, "%s", buf); */
+					break;
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+int nvs_set_clocks(int dev_id)
+{
+	int res;
+	int8_t d = nvs_devnum(dev_id);
+	if (d < 0) return -ENODEV;
+	if (!device_mem_offsets[dev_id] || nvs_clocks_set[d]) return 0;
+	res = nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", device_mem_offsets[dev_id]*2);
+	if (res) nvs_clocks_set[d] = device_mem_offsets[dev_id]*2;
+	return res;
+}
+
+void nvs_reset_clocks(int dev_id)
+{
+	int8_t d = nvs_devnum(dev_id);
+	if (d < 0 || !nvs_clocks_set[d]) return;
+	nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", 0);
+	nvs_clocks_set[d] = 0;
+}
+
+#else
+int nvs_init() { return -ENOSYS; }
+int nvs_set_clocks(int dev_id) { return -ENOSYS; }
+void nvs_reset_clocks(int dev_id) { }
+#endif
diff --git a/util.cpp b/util.cpp
index 37caf690..5b0d1a1a 100644
--- a/util.cpp
+++ b/util.cpp
@@ -173,6 +173,40 @@ void applog(int prio, const char *fmt, ...)
 	va_end(ap);
 }
 
+extern int gpu_threads;
+// Use different prefix if multiple cpu threads per gpu
+// Also, auto hide LOG_DEBUG if --debug (-D) is not used
+void gpulog(int prio, int thr_id, const char *fmt, ...)
+{
+	char _ALIGN(128) pfmt[128];
+	char _ALIGN(128) line[256];
+	int len, dev_id = device_map[thr_id % MAX_GPUS];
+	va_list ap;
+
+	if(prio == LOG_DEBUG && !opt_debug)
+		return;
+
+	if(gpu_threads > 1)
+		len = snprintf(pfmt, 128, "GPU T%d: %s", thr_id, fmt);
+	else
+		len = snprintf(pfmt, 128, "GPU #%d: %s", dev_id, fmt);
+	pfmt[sizeof(pfmt) - 1] = '\0';
+
+	va_start(ap, fmt);
+
+	if(len && vsnprintf(line, sizeof(line), pfmt, ap))
+	{
+		line[sizeof(line) - 1] = '\0';
+		applog(prio, "%s", line);
+	}
+	else
+	{
+		fprintf(stderr, "%s OOM!\n", __func__);
+	}
+
+	va_end(ap);
+}
+
 void format_hashrate(double hashrate, char *output)
 {
 	char prefix = '\0';

From ef53486dd8a1804407c6e92f1cc0c2ead3426699 Mon Sep 17 00:00:00 2001
From: klaust <kto@gmx.de>
Date: Sun, 14 Jan 2018 21:20:05 +0100
Subject: [PATCH 2/2] try to fix NVML / NVAPI

---
 ccminer.cpp |  106 ++-
 nvml.cpp    | 2565 ++++++++++++++++++++++++---------------------------
 2 files changed, 1255 insertions(+), 1416 deletions(-)

diff --git a/ccminer.cpp b/ccminer.cpp
index 39111d28..5055444d 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -176,6 +176,7 @@ uint32_t device_plimit[MAX_GPUS] = {0};
 int8_t device_pstate[MAX_GPUS];
 int32_t device_led[MAX_GPUS] = {-1, -1};
 int opt_led_mode = 0;
+int opt_cudaschedule = -1;
 uint8_t device_tlimit[MAX_GPUS] = {0};
 char *rpc_user = NULL;
 static char *rpc_url = nullptr;
@@ -305,8 +306,8 @@ Options:\n\
 "\
       --mem-clock=N     Set the gpu memory max clock (346.72+ driver)\n\
       --gpu-clock=N     Set the gpu engine max clock (346.72+ driver)\n\
-      --pstate=N        Set the gpu power state (352.21+ driver)\n\
-      --plimit=N        Set the gpu power limit(352.21 + driver)\n"
+      --pstate=N        (not for 10xx cards) Set the gpu power state (352.21+ driver)\n\
+      --plimit=N        Set the gpu power limit (352.21+ driver)\n"
 #endif
 "";
 
@@ -477,6 +478,26 @@ void proper_exit(int reason)
 #ifdef WIN32
 	timeEndPeriod(1);
 #endif
+#ifdef USE_WRAPNVML
+	if(hnvml)
+	{
+		for(int n = 0; n < opt_n_threads; n++)
+		{
+			nvml_reset_clocks(hnvml, device_map[n]);
+		}
+		nvml_destroy(hnvml);
+	}
+	if(need_memclockrst)
+	{
+#	ifdef WIN32
+		for(int n = 0; n < opt_n_threads; n++)
+		{
+			nvapi_toggle_clocks(n, false);
+		}
+#	endif
+	}
+#endif
+
 	sleep(1);
 	exit(reason);
 }
@@ -2555,7 +2576,11 @@ static void parse_arg(int key, char *arg)
 		while(pch != NULL && n < MAX_GPUS)
 		{
 			int dev_id = device_map[n++];
-			device_mem_clocks[dev_id] = atoi(pch);
+			if(*pch == '+' || *pch == '-')
+				device_mem_offsets[dev_id] = atoi(pch);
+			else
+				device_mem_clocks[dev_id] = atoi(pch);
+			need_nvsettings = true;
 			pch = strtok(NULL, ",");
 		}
 	}
@@ -2804,40 +2829,6 @@ int main(int argc, char *argv[])
 
 	cuda_devicenames(); 
 
-#ifdef USE_WRAPNVML
-#if defined(__linux__) || defined(_WIN64)
-	/* nvml is currently not the best choice on Windows (only in x64) */
-	hnvml = nvml_create();
-	if(hnvml)
-	{
-		bool gpu_reinit = false;// (opt_cudaschedule >= 0);
-		cuda_devicenames(); // refresh gpu vendor name
-		applog(LOG_INFO, "NVML GPU monitoring enabled.");
-	}
-#endif
-#ifdef WIN32
-	if(!hnvml && nvapi_init() == 0)
-	{
-		applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
-		if(!hnvml)
-		{
-			cuda_devicenames(); // refresh gpu vendor name
-		}
-		nvapi_init_settings();
-	}
-#endif
-	else if(!hnvml)
-		applog(LOG_INFO, "GPU monitoring is not available.");
-	// force reinit to set default device flags
-	if(!hnvml)
-	{
-		for(int n = 0; n < active_gpus; n++)
-		{
-			cuda_reset_device(n, NULL);
-		}
-	}
-#endif
-
 	if(opt_protocol)
 	{
 		curl_version_info_data *info;
@@ -3037,11 +3028,25 @@ int main(int argc, char *argv[])
 			tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
 	}
 
+
+#ifdef __linux__
+	if(need_nvsettings)
+	{
+		if(nvs_init() < 0)
+			need_nvsettings = false;
+	}
+#endif
+
 #ifdef USE_WRAPNVML
 #if defined(__linux__) || defined(_WIN64)
 	/* nvml is currently not the best choice on Windows (only in x64) */
-	if (hnvml) {
-		bool gpu_reinit = false;// (opt_cudaschedule >= 0);
+	hnvml = nvml_create();
+	if(hnvml)
+	{
+		bool gpu_reinit = (opt_cudaschedule >= 0); //false
+		cuda_devicenames(); // refresh gpu vendor name
+		if(!opt_quiet)
+			applog(LOG_INFO, "NVML GPU monitoring enabled.");
 		for(int n = 0; n < active_gpus; n++)
 		{
 			if(nvml_set_pstate(hnvml, device_map[n]) == 1)
@@ -3057,6 +3062,29 @@ int main(int argc, char *argv[])
 		}
 	}
 #endif
+#ifdef WIN32
+	if(nvapi_init() == 0)
+	{
+		if(!opt_quiet)
+			applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
+		if(!hnvml)
+		{
+			cuda_devicenames(); // refresh gpu vendor name
+		}
+		nvapi_init_settings();
+	}
+#endif
+	else if(!hnvml && !opt_quiet)
+		applog(LOG_INFO, "GPU monitoring is not available.");
+
+	// force reinit to set default device flags
+	if(opt_cudaschedule >= 0 && !hnvml)
+	{
+		for(int n = 0; n < active_gpus; n++)
+		{
+			cuda_reset_device(n, NULL);
+		}
+	}
 #endif
 
 	if(opt_api_listen)
diff --git a/nvml.cpp b/nvml.cpp
index 2bf19d8e..7b7d3227 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -1,19 +1,19 @@
-/*
-* A trivial little dlopen()-based wrapper library for the
-* NVIDIA NVML library, to allow runtime discovery of NVML on an
-* arbitrary system.  This is all very hackish and simple-minded, but
-* it serves my immediate needs in the short term until NVIDIA provides
-* a static NVML wrapper library themselves, hopefully in
-* CUDA 6.5 or maybe sometime shortly after.
-*
-* This trivial code is made available under the "new" 3-clause BSD license,
-* and/or any of the GPL licenses you prefer.
-* Feel free to use the code and modify as you see fit.
-*
-* John E. Stone - john.stone@gmail.com
-* Tanguy Pruvot - tpruvot@github
-*
-*/
+﻿/*
+ * A trivial little dlopen()-based wrapper library for the
+ * NVIDIA NVML library, to allow runtime discovery of NVML on an
+ * arbitrary system.  This is all very hackish and simple-minded, but
+ * it serves my immediate needs in the short term until NVIDIA provides
+ * a static NVML wrapper library themselves, hopefully in
+ * CUDA 6.5 or maybe sometime shortly after.
+ *
+ * This trivial code is made available under the "new" 3-clause BSD license,
+ * and/or any of the GPL licenses you prefer.
+ * Feel free to use the code and modify as you see fit.
+ *
+ * John E. Stone - john.stone@gmail.com
+ * Tanguy Pruvot - tpruvot@github
+ *
+ */
 
 #include <errno.h>
 #include <stdio.h>
@@ -30,7 +30,7 @@
 extern nvml_handle *hnvml;
 extern char driver_version[32];
 
-static uint32_t device_bus_ids[MAX_GPUS] = {0};
+static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
 
 extern uint32_t device_gpu_clocks[MAX_GPUS];
 extern uint32_t device_mem_clocks[MAX_GPUS];
@@ -38,68 +38,60 @@ extern int32_t device_mem_offsets[MAX_GPUS];
 extern uint8_t device_tlimit[MAX_GPUS];
 extern int8_t device_pstate[MAX_GPUS];
 extern int32_t device_led[MAX_GPUS];
-int32_t device_led_state[MAX_GPUS] = {0};
+int32_t device_led_state[MAX_GPUS] = { 0 };
 static THREAD bool has_rgb_ok = false;
 
-uint32_t clock_prev[MAX_GPUS] = {0};
-uint32_t clock_prev_mem[MAX_GPUS] = {0};
-uint32_t limit_prev[MAX_GPUS] = {0};
+uint32_t clock_prev[MAX_GPUS] = { 0 };
+uint32_t clock_prev_mem[MAX_GPUS] = { 0 };
+uint32_t limit_prev[MAX_GPUS] = { 0 };
 
 static bool nvml_plimit_set = false;
 extern bool need_memclockrst;
 
 /*
-* Wrappers to emulate dlopen() on other systems like Windows
-*/
+ * Wrappers to emulate dlopen() on other systems like Windows
+ */
 #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
-#include <windows.h>
-static void *wrap_dlopen(const char *filename)
-{
-	HMODULE h = LoadLibrary(filename);
-	if(!h && opt_debug)
-	{
-		applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
-			   GetLastError(), filename);
+	#include <windows.h>
+	static void *wrap_dlopen(const char *filename) {
+		HMODULE h = LoadLibrary(filename);
+		if (!h && opt_debug) {
+			applog(LOG_DEBUG, "dlopen(%d): failed to load %s", 
+				GetLastError(), filename);
+		}
+		return (void*)h;
+	}
+	static void *wrap_dlsym(void *h, const char *sym) {
+		return (void *)GetProcAddress((HINSTANCE)h, sym);
+	}
+	static int wrap_dlclose(void *h) {
+		/* FreeLibrary returns nonzero on success */
+		return (!FreeLibrary((HINSTANCE)h));
 	}
-	return (void*)h;
-}
-static void *wrap_dlsym(void *h, const char *sym)
-{
-	return (void *)GetProcAddress((HINSTANCE)h, sym);
-}
-static int wrap_dlclose(void *h)
-{
-	/* FreeLibrary returns nonzero on success */
-	return (!FreeLibrary((HINSTANCE)h));
-}
 #else
-/* assume we can use dlopen itself... */
-#include <dlfcn.h>
-#include <errno.h>
-static void *wrap_dlopen(const char *filename)
-{
-	void *h = dlopen(filename, RTLD_NOW);
-	if(h == NULL && opt_debug)
-	{
-		applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
-			   errno, filename);
+	/* assume we can use dlopen itself... */
+	#include <dlfcn.h>
+	#include <errno.h>
+	static void *wrap_dlopen(const char *filename) {
+		void *h = dlopen(filename, RTLD_NOW);
+		if (h == NULL && opt_debug) {
+			applog(LOG_DEBUG, "dlopen(%d): failed to load %s", 
+				errno, filename);
+		}
+		return (void*)h;
 	}
-	return (void*)h;
-}
 
-static void *wrap_dlsym(void *h, const char *sym)
-{
-	return dlsym(h, sym);
-}
-static int wrap_dlclose(void *h)
-{
-	return dlclose(h);
-}
+	static void *wrap_dlsym(void *h, const char *sym) {
+		return dlsym(h, sym);
+	}
+	static int wrap_dlclose(void *h) {
+		return dlclose(h);
+	}
 #endif
 
 nvml_handle * nvml_create()
 {
-	int i = 0;
+	int i=0;
 	nvml_handle *nvmlh = NULL;
 
 #ifdef WIN32
@@ -118,116 +110,115 @@ nvml_handle * nvml_create()
 #endif
 
 	void *nvml_dll = wrap_dlopen(tmp);
-	if(nvml_dll == NULL)
-	{
+	if (nvml_dll == NULL) {
 #ifdef WIN32
 		nvml_dll = wrap_dlopen("nvml.dll");
-		if(nvml_dll == NULL)
+		if (nvml_dll == NULL)
 #endif
-			return NULL;
+		return NULL;
 	}
 
-	nvmlh = (nvml_handle *)calloc(1, sizeof(nvml_handle));
+	nvmlh = (nvml_handle *) calloc(1, sizeof(nvml_handle));
 
 	nvmlh->nvml_dll = nvml_dll;
 
-	nvmlh->nvmlInit = (nvmlReturn_t(*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
-	if(!nvmlh->nvmlInit)
-		nvmlh->nvmlInit = (nvmlReturn_t(*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
-	nvmlh->nvmlDeviceGetCount = (nvmlReturn_t(*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
-	if(!nvmlh->nvmlDeviceGetCount)
-		nvmlh->nvmlDeviceGetCount = (nvmlReturn_t(*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount");
-	nvmlh->nvmlDeviceGetHandleByIndex = (nvmlReturn_t(*)(int, nvmlDevice_t *))
+	nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
+	if (!nvmlh->nvmlInit)
+		nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
+	nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
+	if (!nvmlh->nvmlDeviceGetCount)
+		nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount");
+	nvmlh->nvmlDeviceGetHandleByIndex = (nvmlReturn_t (*)(int, nvmlDevice_t *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
-	nvmlh->nvmlDeviceGetAPIRestriction = (nvmlReturn_t(*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *))
+	nvmlh->nvmlDeviceGetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAPIRestriction");
-	nvmlh->nvmlDeviceSetAPIRestriction = (nvmlReturn_t(*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t))
+	nvmlh->nvmlDeviceSetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAPIRestriction");
-	nvmlh->nvmlDeviceGetDefaultApplicationsClock = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
+	nvmlh->nvmlDeviceGetDefaultApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetDefaultApplicationsClock");
-	nvmlh->nvmlDeviceGetApplicationsClock = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clocks))
+	nvmlh->nvmlDeviceGetApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clocks))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetApplicationsClock");
-	nvmlh->nvmlDeviceSetApplicationsClocks = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int mem, unsigned int gpu))
+	nvmlh->nvmlDeviceSetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int mem, unsigned int gpu))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetApplicationsClocks");
-	nvmlh->nvmlDeviceResetApplicationsClocks = (nvmlReturn_t(*)(nvmlDevice_t))
+	nvmlh->nvmlDeviceResetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceResetApplicationsClocks");
-	nvmlh->nvmlDeviceGetSupportedGraphicsClocks = (nvmlReturn_t(*)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *))
+	nvmlh->nvmlDeviceGetSupportedGraphicsClocks = (nvmlReturn_t (*)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedGraphicsClocks");
-	nvmlh->nvmlDeviceGetSupportedMemoryClocks = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz))
+	nvmlh->nvmlDeviceGetSupportedMemoryClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedMemoryClocks");
-	nvmlh->nvmlDeviceGetClockInfo = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
+	nvmlh->nvmlDeviceGetClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo");
-	nvmlh->nvmlDeviceGetMaxClockInfo = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
+	nvmlh->nvmlDeviceGetMaxClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxClockInfo");
-	nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t(*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo_v2");
-	if(!nvmlh->nvmlDeviceGetPciInfo)
-		nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t(*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
-	nvmlh->nvmlDeviceGetCurrPcieLinkGeneration = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *gen))
+	nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo_v2");
+	if (!nvmlh->nvmlDeviceGetPciInfo)
+		nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
+	nvmlh->nvmlDeviceGetCurrPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkGeneration");
-	nvmlh->nvmlDeviceGetCurrPcieLinkWidth = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *width))
+	nvmlh->nvmlDeviceGetCurrPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkWidth");
-	nvmlh->nvmlDeviceGetMaxPcieLinkGeneration = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *gen))
+	nvmlh->nvmlDeviceGetMaxPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkGeneration");
-	nvmlh->nvmlDeviceGetMaxPcieLinkWidth = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *width))
+	nvmlh->nvmlDeviceGetMaxPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkWidth");
-	nvmlh->nvmlDeviceGetPowerUsage = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *))
+	nvmlh->nvmlDeviceGetPowerUsage = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
-	nvmlh->nvmlDeviceGetPowerManagementDefaultLimit = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *limit))
+	nvmlh->nvmlDeviceGetPowerManagementDefaultLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementDefaultLimit");
-	nvmlh->nvmlDeviceGetPowerManagementLimit = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *limit))
+	nvmlh->nvmlDeviceGetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimit");
-	nvmlh->nvmlDeviceGetPowerManagementLimitConstraints = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *min, unsigned int *max))
+	nvmlh->nvmlDeviceGetPowerManagementLimitConstraints = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *min, unsigned int *max))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimitConstraints");
-	nvmlh->nvmlDeviceSetPowerManagementLimit = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int limit))
+	nvmlh->nvmlDeviceSetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int limit))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetPowerManagementLimit");
-	nvmlh->nvmlDeviceGetName = (nvmlReturn_t(*)(nvmlDevice_t, char *, int))
+	nvmlh->nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
-	nvmlh->nvmlDeviceGetTemperature = (nvmlReturn_t(*)(nvmlDevice_t, int, unsigned int *))
+	nvmlh->nvmlDeviceGetTemperature = (nvmlReturn_t (*)(nvmlDevice_t, int, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
-	nvmlh->nvmlDeviceGetFanSpeed = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *))
+	nvmlh->nvmlDeviceGetFanSpeed = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
-	nvmlh->nvmlDeviceGetPerformanceState = (nvmlReturn_t(*)(nvmlDevice_t, int *))
+	nvmlh->nvmlDeviceGetPerformanceState = (nvmlReturn_t (*)(nvmlDevice_t, int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPerformanceState"); /* or nvmlDeviceGetPowerState */
-	nvmlh->nvmlDeviceGetSerial = (nvmlReturn_t(*)(nvmlDevice_t, char *, unsigned int))
+	nvmlh->nvmlDeviceGetSerial = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSerial");
-	nvmlh->nvmlDeviceGetUUID = (nvmlReturn_t(*)(nvmlDevice_t, char *, unsigned int))
+	nvmlh->nvmlDeviceGetUUID = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetUUID");
-	nvmlh->nvmlDeviceGetVbiosVersion = (nvmlReturn_t(*)(nvmlDevice_t, char *, unsigned int))
+	nvmlh->nvmlDeviceGetVbiosVersion = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetVbiosVersion");
-	nvmlh->nvmlSystemGetDriverVersion = (nvmlReturn_t(*)(char *, unsigned int))
+	nvmlh->nvmlSystemGetDriverVersion = (nvmlReturn_t (*)(char *, unsigned int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlSystemGetDriverVersion");
 	nvmlh->nvmlErrorString = (char* (*)(nvmlReturn_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString");
-	nvmlh->nvmlShutdown = (nvmlReturn_t(*)())
+	nvmlh->nvmlShutdown = (nvmlReturn_t (*)())
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
 	// v331
-	nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *limit))
+	nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit");
 	// v340
 #ifdef __linux__
-	nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t(*)(nvmlDevice_t))
+	nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity");
-	nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t(*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet))
+	nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity");
-	nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t(*)(nvmlDevice_t))
+	nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity");
 #endif
 	// v346
-	nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t(*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value))
+	nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput");
 	// v36x (API 8 / Pascal)
-	nvmlh->nvmlDeviceGetClock = (nvmlReturn_t(*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz))
+	nvmlh->nvmlDeviceGetClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock");
 
-	if(nvmlh->nvmlInit == NULL ||
-	   nvmlh->nvmlShutdown == NULL ||
-	   nvmlh->nvmlErrorString == NULL ||
-	   nvmlh->nvmlDeviceGetCount == NULL ||
-	   nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
-	   nvmlh->nvmlDeviceGetPciInfo == NULL ||
-	   nvmlh->nvmlDeviceGetName == NULL)
+	if (nvmlh->nvmlInit == NULL ||
+			nvmlh->nvmlShutdown == NULL ||
+			nvmlh->nvmlErrorString == NULL ||
+			nvmlh->nvmlDeviceGetCount == NULL ||
+			nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
+			nvmlh->nvmlDeviceGetPciInfo == NULL ||
+			nvmlh->nvmlDeviceGetName == NULL)
 	{
-		if(opt_debug)
+		if (opt_debug)
 			applog(LOG_DEBUG, "Failed to obtain required NVML function pointers");
 		wrap_dlclose(nvmlh->nvml_dll);
 		free(nvmlh);
@@ -235,86 +226,76 @@ nvml_handle * nvml_create()
 	}
 
 	nvmlh->nvmlInit();
-	if(nvmlh->nvmlSystemGetDriverVersion)
+	if (nvmlh->nvmlSystemGetDriverVersion)
 		nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version));
 	nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
 
 	/* Query CUDA device count, in case it doesn't agree with NVML, since  */
 	/* CUDA will only report GPUs with compute capability greater than 1.0 */
-	if(cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess)
-	{
-		if(opt_debug)
+	if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
+		if (opt_debug)
 			applog(LOG_DEBUG, "Failed to query CUDA device count!");
 		wrap_dlclose(nvmlh->nvml_dll);
 		free(nvmlh);
 		return NULL;
 	}
 
-	nvmlh->devs = (nvmlDevice_t *)calloc(nvmlh->nvml_gpucount, sizeof(nvmlDevice_t));
-	nvmlh->nvml_pci_domain_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_pci_bus_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_pci_device_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_pci_vendor_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_pci_subsys_id = (unsigned int*)calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
-	nvmlh->nvml_cuda_device_id = (int*)calloc(nvmlh->nvml_gpucount, sizeof(int));
-	nvmlh->cuda_nvml_device_id = (int*)calloc(nvmlh->cuda_gpucount, sizeof(int));
-	nvmlh->app_clocks = (nvmlEnableState_t*)calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t));
+	nvmlh->devs = (nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(nvmlDevice_t));
+	nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_pci_vendor_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_pci_subsys_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+	nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
+	nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
+	nvmlh->app_clocks = (nvmlEnableState_t*) calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t));
 
 	/* Obtain GPU device handles we're going to need repeatedly... */
-	for(i = 0; i<nvmlh->nvml_gpucount; i++)
-	{
+	for (i=0; i<nvmlh->nvml_gpucount; i++) {
 		nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
 	}
 
 	/* Query PCI info for each NVML device, and build table for mapping of */
 	/* CUDA device IDs to NVML device IDs and vice versa                   */
-	for(i = 0; i<nvmlh->nvml_gpucount; i++)
-	{
+	for (i=0; i<nvmlh->nvml_gpucount; i++) {
 		nvmlPciInfo_t pciinfo;
 
 		nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
 		nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
-		nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
+		nvmlh->nvml_pci_bus_id[i]    = pciinfo.bus;
 		nvmlh->nvml_pci_device_id[i] = pciinfo.device;
 		nvmlh->nvml_pci_vendor_id[i] = pciinfo.pci_device_id;
 		nvmlh->nvml_pci_subsys_id[i] = pciinfo.pci_subsystem_id;
 
 		nvmlh->app_clocks[i] = NVML_FEATURE_UNKNOWN;
-		if(nvmlh->nvmlDeviceSetAPIRestriction)
-		{
+		if (nvmlh->nvmlDeviceSetAPIRestriction) {
 			nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
-											   NVML_FEATURE_ENABLED);
+				NVML_FEATURE_ENABLED);
 			/* there is only this API_SET_APPLICATION_CLOCKS on the 750 Ti (340.58) */
 		}
-		if(nvmlh->nvmlDeviceGetAPIRestriction)
-		{
+		if (nvmlh->nvmlDeviceGetAPIRestriction) {
 			nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
-											   &nvmlh->app_clocks[i]);
+				&nvmlh->app_clocks[i]);
 		}
 	}
 
 	/* build mapping of NVML device IDs to CUDA IDs */
-	for(i = 0; i<nvmlh->nvml_gpucount; i++)
-	{
+	for (i=0; i<nvmlh->nvml_gpucount; i++) {
 		nvmlh->nvml_cuda_device_id[i] = -1;
 	}
-	for(i = 0; i<nvmlh->cuda_gpucount; i++)
-	{
+	for (i=0; i<nvmlh->cuda_gpucount; i++) {
 		cudaDeviceProp props;
 		nvmlh->cuda_nvml_device_id[i] = -1;
 
-		if(cudaGetDeviceProperties(&props, i) == cudaSuccess)
-		{
+		if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
 			device_bus_ids[i] = props.pciBusID;
-			for(int j = 0; j < nvmlh->nvml_gpucount; j++)
-			{
-				if((nvmlh->nvml_pci_domain_id[j] == (uint32_t)props.pciDomainID) &&
-					(nvmlh->nvml_pci_bus_id[j] == (uint32_t)props.pciBusID) &&
-				   (nvmlh->nvml_pci_device_id[j] == (uint32_t)props.pciDeviceID))
-				{
-					if(opt_debug)
+			for (int j = 0; j < nvmlh->nvml_gpucount; j++) {
+				if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
+				    (nvmlh->nvml_pci_bus_id[j]    == (uint32_t) props.pciBusID) &&
+				    (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
+					if (opt_debug)
 						applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u",
-							   i, j, (uint32_t)props.pciBusID);
+							i, j, (uint32_t) props.pciBusID);
 					nvmlh->nvml_cuda_device_id[j] = i;
 					nvmlh->cuda_nvml_device_id[i] = j;
 				}
@@ -333,51 +314,47 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
 	//if (need_nvsettings) /* prefer later than init time */
 	//	nvs_set_clocks(dev_id);
-	if(n < 0 || n >= nvmlh->nvml_gpucount)
+	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(!device_gpu_clocks[dev_id] && !device_mem_clocks[dev_id])
+	if (!device_gpu_clocks[dev_id] && !device_mem_clocks[dev_id])
 		return 0; // nothing to do
 
-	if(nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED)
-	{
+	if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
 		applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", dev_id);
 		return -EPERM;
 	}
 
 	uint32_t mem_prev = clock_prev_mem[dev_id];
-	if(!mem_prev)
+	if (!mem_prev)
 		nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev);
 	uint32_t gpu_prev = clock_prev[dev_id];
-	if(!gpu_prev)
+	if (!gpu_prev)
 		nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev);
 
 	nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
 	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
-	if(rc != NVML_SUCCESS)
-	{
+	if (rc != NVML_SUCCESS) {
 		applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
 		return -EINVAL;
 	}
 
-	if(opt_debug)
+	if (opt_debug)
 		applog(LOG_DEBUG, "GPU #%d: default application clocks are %u/%u", dev_id, mem_clk, gpu_clk);
 
 	// get application config values
-	if(device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
-	if(device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
+	if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
+	if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
 
 	// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
-	uint32_t nclocks = 0, mem_clocks[32] = {0};
+	uint32_t nclocks = 0, mem_clocks[32] = { 0 };
 	nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
 	nclocks = min(nclocks, 32);
-	if(nclocks)
+	if (nclocks)
 		nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
-	for(uint8_t u = 0; u < nclocks; u++)
-	{
+	for (uint8_t u=0; u < nclocks; u++) {
 		// ordered by pstate (so highest is first memory clock - P0)
-		if(mem_clocks[u] <= mem_clk)
-		{
+		if (mem_clocks[u] <= mem_clk) {
 			mem_clk = mem_clocks[u];
 			break;
 		}
@@ -386,17 +363,14 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	uint32_t* gpu_clocks = NULL;
 	nclocks = 0;
 	nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
-	if(nclocks)
-	{
-		if(opt_debug)
+	if (nclocks) {
+		if (opt_debug)
 			applog(LOG_DEBUG, "GPU #%d: %u clocks found for mem %u", dev_id, nclocks, mem_clk);
-		gpu_clocks = (uint32_t*)calloc(1, sizeof(uint32_t) * nclocks + 4);
+		gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4);
 		nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
-		for(uint8_t u = 0; u < nclocks; u++)
-		{
+		for (uint8_t u=0; u < nclocks; u++) {
 			// ordered desc, so get first
-			if(gpu_clocks[u] <= gpu_clk)
-			{
+			if (gpu_clocks[u] <= gpu_clk) {
 				gpu_clk = gpu_clocks[u];
 				break;
 			}
@@ -405,10 +379,9 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	}
 
 	rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
-	if(rc == NVML_SUCCESS)
+	if (rc == NVML_SUCCESS)
 		applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
-	else
-	{
+	else {
 		applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
 		return -1;
 	}
@@ -426,34 +399,28 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
-	if(need_nvsettings)
+	if (need_nvsettings)
 		nvs_reset_clocks(dev_id);
-	if(n < 0 || n >= nvmlh->nvml_gpucount)
+	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(clock_prev[dev_id])
-	{
+	if (clock_prev[dev_id]) {
 		rc = nvmlh->nvmlDeviceResetApplicationsClocks(nvmlh->devs[n]);
-		if(rc != NVML_SUCCESS)
-		{
+		if (rc != NVML_SUCCESS) {
 			applog(LOG_WARNING, "GPU #%d: unable to reset application clocks", dev_id);
 		}
 		clock_prev[dev_id] = 0;
 		ret = 1;
 	}
 
-	if(limit_prev[dev_id])
-	{
+	if (limit_prev[dev_id]) {
 		uint32_t plimit = limit_prev[dev_id];
-		if(nvmlh->nvmlDeviceGetPowerManagementDefaultLimit && !plimit)
-		{
+		if (nvmlh->nvmlDeviceGetPowerManagementDefaultLimit && !plimit) {
 			rc = nvmlh->nvmlDeviceGetPowerManagementDefaultLimit(nvmlh->devs[n], &plimit);
-		}
-		else if(plimit)
-		{
+		} else if (plimit) {
 			rc = NVML_SUCCESS;
 		}
-		if(rc == NVML_SUCCESS)
+		if (rc == NVML_SUCCESS)
 			nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
 		ret = 1;
 	}
@@ -461,54 +428,49 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
 }
 
 /**
-* Set power state of a device (9xx)
-* Code is similar as clocks one, which allow the change of the pstate
-*/
+ * Set power state of a device (9xx)
+ * Code is similar as clocks one, which allow the change of the pstate
+ */
 int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
 {
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
-	if(n < 0 || n >= nvmlh->nvml_gpucount)
+	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(device_pstate[dev_id] < 0)
+	if (device_pstate[dev_id] < 0)
 		return 0;
 
-	if(nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED)
-	{
+	if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
 		applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id);
 		return -EPERM;
 	}
 
 	nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
 	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
-	if(rc != NVML_SUCCESS)
-	{
+	if (rc != NVML_SUCCESS) {
 		applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
 		return -EINVAL;
 	}
 
 	// get application config values
-	if(device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
-	if(device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
+	if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
+	if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
 
 	// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
-	uint32_t nclocks = 0, mem_clocks[32] = {0};
+	uint32_t nclocks = 0, mem_clocks[32] = { 0 };
 	int8_t wanted_pstate = device_pstate[dev_id];
 	nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
 	nclocks = min(nclocks, 32);
-	if(nclocks)
+	if (nclocks)
 		nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
-	if((uint32_t)wanted_pstate + 1 > nclocks)
-	{
+	if ((uint32_t) wanted_pstate+1 > nclocks) {
 		applog(LOG_WARNING, "GPU #%d: only %u mem clocks available (p-states)", dev_id, nclocks);
 	}
-	for(uint8_t u = 0; u < nclocks; u++)
-	{
+	for (uint8_t u=0; u < nclocks; u++) {
 		// ordered by pstate (so highest P0 first)
-		if(u == wanted_pstate)
-		{
+		if (u == wanted_pstate) {
 			mem_clk = mem_clocks[u];
 			break;
 		}
@@ -517,12 +479,10 @@ int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
 	uint32_t* gpu_clocks = NULL;
 	nclocks = 0;
 	nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
-	if(nclocks)
-	{
-		gpu_clocks = (uint32_t*)calloc(1, sizeof(uint32_t) * nclocks + 4);
+	if (nclocks) {
+		gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4);
 		rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
-		if(rc == NVML_SUCCESS)
-		{
+		if (rc == NVML_SUCCESS) {
 			// ordered desc, get the max app clock (do not limit)
 			gpu_clk = gpu_clocks[0];
 		}
@@ -530,15 +490,14 @@ int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
 	}
 
 	rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
-	if(rc != NVML_SUCCESS)
-	{
-		applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int)wanted_pstate,
-			   mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
+	if (rc != NVML_SUCCESS) {
+		applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int) wanted_pstate,
+			mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
 		return -1;
 	}
 
-	if(!opt_quiet)
-		applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int)wanted_pstate, mem_clk, gpu_clk);
+	if (!opt_quiet)
+		applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk);
 
 	clock_prev[dev_id] = 1;
 	return 1;
@@ -549,48 +508,43 @@ int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
 	nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
-	if(n < 0 || n >= nvmlh->nvml_gpucount)
+	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(!device_plimit[dev_id])
+	if (!device_plimit[dev_id])
 		return 0; // nothing to do
 
-	if(!nvmlh->nvmlDeviceSetPowerManagementLimit)
+	if (!nvmlh->nvmlDeviceSetPowerManagementLimit)
 		return -ENOSYS;
 
 	uint32_t plimit = device_plimit[dev_id] * 1000;
 	uint32_t pmin = 1000, pmax = 0, prev_limit = 0;
-	if(nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
+	if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
 		rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
 
-	if(rc != NVML_SUCCESS)
-	{
-		if(!nvmlh->nvmlDeviceGetPowerManagementLimit)
+	if (rc != NVML_SUCCESS) {
+		if (!nvmlh->nvmlDeviceGetPowerManagementLimit)
 			return -ENOSYS;
 	}
 	nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit);
-	if(!pmax) pmax = prev_limit;
+	if (!pmax) pmax = prev_limit;
 
 	plimit = min(plimit, pmax);
 	plimit = max(plimit, pmin);
 	rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
-	if(rc != NVML_SUCCESS)
-	{
+	if (rc != NVML_SUCCESS) {
 #ifndef WIN32
 		applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
 #endif
 		return -1;
-	}
-	else
-	{
+	} else {
 		device_plimit[dev_id] = plimit / 1000;
 		nvml_plimit_set = true;
 	}
 
-	if(!opt_quiet)
-	{
+	if (!opt_quiet) {
 		applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
-			   dev_id, plimit / 1000U, pmin / 1000U, pmax / 1000U);
+			dev_id, plimit/1000U, pmin/1000U, pmax/1000U);
 	}
 
 	limit_prev[dev_id] = prev_limit;
@@ -601,11 +555,10 @@ uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id)
 {
 	uint32_t plimit = 0;
 	int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1;
-	if(n < 0 || n >= nvmlh->nvml_gpucount)
+	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return 0;
 
-	if(nvmlh->nvmlDeviceGetPowerManagementLimit)
-	{
+	if (nvmlh->nvmlDeviceGetPowerManagementLimit) {
 		nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit);
 	}
 	return plimit;
@@ -615,10 +568,10 @@ uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id)
 #define LSTDEV_PFX "        "
 void nvml_print_device_info(int dev_id)
 {
-	if(!hnvml) return;
+	if (!hnvml) return;
 
 	int n = hnvml->cuda_nvml_device_id[dev_id];
-	if(n < 0 || n >= hnvml->nvml_gpucount)
+	if (n < 0 || n >= hnvml->nvml_gpucount)
 		return;
 
 	nvmlReturn_t rc;
@@ -630,30 +583,26 @@ void nvml_print_device_info(int dev_id)
 	int spid = hnvml->nvml_pci_subsys_id[n] >> 16;
 
 	fprintf(stderr, LSTDEV_PFX "ID %04x:%04x/%04x:%04x BUS %04x:%02x:%02x.0\n", gvid, gpid, svid, spid,
-		(int)hnvml->nvml_pci_domain_id[n], (int)hnvml->nvml_pci_bus_id[n], (int)hnvml->nvml_pci_device_id[n]);
+		(int) hnvml->nvml_pci_domain_id[n], (int) hnvml->nvml_pci_bus_id[n], (int) hnvml->nvml_pci_device_id[n]);
 
-	if(hnvml->nvmlDeviceGetClock)
-	{
+	if (hnvml->nvmlDeviceGetClock) {
 		uint32_t gpu_clk = 0, mem_clk = 0;
 
 		// fprintf(stderr, "------- Clocks -------\n");
 
 		hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk);
 		rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk);
-		if(rc == NVML_SUCCESS)
-		{
+		if (rc == NVML_SUCCESS) {
 			fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
 		}
 		hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk);
 		rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk);
-		if(rc == NVML_SUCCESS)
-		{
+		if (rc == NVML_SUCCESS) {
 			fprintf(stderr, LSTDEV_PFX "TARGET  MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
 		}
 		hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk);
 		rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk);
-		if(rc == NVML_SUCCESS)
-		{
+		if (rc == NVML_SUCCESS) {
 			fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
 		}
 	}
@@ -675,13 +624,13 @@ int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount)
 int nvml_get_gpu_name(nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(!nvmlh->nvmlDeviceGetName)
+	if (!nvmlh->nvmlDeviceGetName)
 		return -ENOSYS;
 
-	if(nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != NVML_SUCCESS)
+	if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != NVML_SUCCESS)
 		return -1;
 
 	return 0;
@@ -692,15 +641,14 @@ int nvml_get_tempC(nvml_handle *nvmlh, int cudaindex, unsigned int *tempC)
 {
 	nvmlReturn_t rc;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(!nvmlh->nvmlDeviceGetTemperature)
+	if (!nvmlh->nvmlDeviceGetTemperature)
 		return -ENOSYS;
 
 	rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
-	if(rc != NVML_SUCCESS)
-	{
+	if (rc != NVML_SUCCESS) {
 		return -1;
 	}
 
@@ -712,15 +660,14 @@ int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
 {
 	nvmlReturn_t rc;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(!nvmlh->nvmlDeviceGetFanSpeed)
+	if (!nvmlh->nvmlDeviceGetFanSpeed)
 		return -ENOSYS;
 
 	rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
-	if(rc != NVML_SUCCESS)
-	{
+	if (rc != NVML_SUCCESS) {
 		return -1;
 	}
 
@@ -732,13 +679,13 @@ int nvml_get_current_clocks(int cudaindex, unsigned int *graphics_clock, unsigne
 {
 	nvmlReturn_t rc;
 	int gpuindex = hnvml->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -ENODEV;
-	if(!hnvml->nvmlDeviceGetClockInfo) return -ENOSYS;
+	if (gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -ENODEV;
+	if (!hnvml->nvmlDeviceGetClockInfo) return -ENOSYS;
 
 	rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock);
-	if(rc != NVML_SUCCESS) return -1;
+	if (rc != NVML_SUCCESS) return -1;
 	rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock);
-	if(rc != NVML_SUCCESS) return -1;
+	if (rc != NVML_SUCCESS) return -1;
 
 	return 0;
 }
@@ -747,15 +694,14 @@ int nvml_get_current_clocks(int cudaindex, unsigned int *graphics_clock, unsigne
 int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(!nvmlh->nvmlDeviceGetPowerUsage)
+	if (!nvmlh->nvmlDeviceGetPowerUsage)
 		return -ENOSYS;
 
 	nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
-	if(res != NVML_SUCCESS)
-	{
+	if (res != NVML_SUCCESS) {
 		//if (opt_debug)
 		//	applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
 		return -1;
@@ -768,15 +714,14 @@ int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliw
 int nvml_get_pstate(nvml_handle *nvmlh, int cudaindex, int *pstate)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(!nvmlh->nvmlDeviceGetPerformanceState)
+	if (!nvmlh->nvmlDeviceGetPerformanceState)
 		return -ENOSYS;
 
 	nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
-	if(res != NVML_SUCCESS)
-	{
+	if (res != NVML_SUCCESS) {
 		//if (opt_debug)
 		//	applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
 		return -1;
@@ -788,7 +733,7 @@ int nvml_get_pstate(nvml_handle *nvmlh, int cudaindex, int *pstate)
 int nvml_get_busid(nvml_handle *nvmlh, int cudaindex, int *busid)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
 	(*busid) = nvmlh->nvml_pci_bus_id[gpuindex];
@@ -801,32 +746,30 @@ int nvml_get_serial(nvml_handle *nvmlh, int cudaindex, char *sn, int maxlen)
 	char uuid[NVML_DEVICE_UUID_BUFFER_SIZE];
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
 	nvmlReturn_t res;
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(nvmlh->nvmlDeviceGetSerial)
-	{
+	if (nvmlh->nvmlDeviceGetSerial) {
 		res = nvmlh->nvmlDeviceGetSerial(nvmlh->devs[gpuindex], sn, maxlen);
-		if(res == NVML_SUCCESS)
+		if (res == NVML_SUCCESS)
 			return 0;
 	}
 
-	if(!nvmlh->nvmlDeviceGetUUID)
+	if (!nvmlh->nvmlDeviceGetUUID)
 		return -ENOSYS;
 
 	// nvmlDeviceGetUUID: GPU-f2bd642c-369f-5a14-e0b4-0d22dfe9a1fc
 	// use a part of uuid to generate an unique serial
 	// todo: check if there is vendor id is inside
 	memset(uuid, 0, sizeof(uuid));
-	res = nvmlh->nvmlDeviceGetUUID(nvmlh->devs[gpuindex], uuid, sizeof(uuid) - 1);
-	if(res != NVML_SUCCESS)
-	{
-		if(opt_debug)
+	res = nvmlh->nvmlDeviceGetUUID(nvmlh->devs[gpuindex], uuid, sizeof(uuid)-1);
+	if (res != NVML_SUCCESS) {
+		if (opt_debug)
 			applog(LOG_DEBUG, "nvmlDeviceGetUUID: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
-	strncpy(sn, &uuid[4], min((int)strlen(uuid), maxlen));
-	sn[maxlen - 1] = '\0';
+	strncpy(sn, &uuid[4], min((int) strlen(uuid), maxlen));
+	sn[maxlen-1] = '\0';
 	return 0;
 }
 
@@ -834,16 +777,15 @@ int nvml_get_bios(nvml_handle *nvmlh, int cudaindex, char *desc, int maxlen)
 {
 	uint32_t subids = 0;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
-	if(!nvmlh->nvmlDeviceGetVbiosVersion)
+	if (!nvmlh->nvmlDeviceGetVbiosVersion)
 		return -ENOSYS;
 
 	nvmlReturn_t res = nvmlh->nvmlDeviceGetVbiosVersion(nvmlh->devs[gpuindex], desc, maxlen);
-	if(res != NVML_SUCCESS)
-	{
-		if(opt_debug)
+	if (res != NVML_SUCCESS) {
+		if (opt_debug)
 			applog(LOG_DEBUG, "nvmlDeviceGetVbiosVersion: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
@@ -854,15 +796,15 @@ int nvml_get_info(nvml_handle *nvmlh, int cudaindex, uint16_t &vid, uint16_t &pi
 {
 	uint32_t subids = 0;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
-	if(gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
 	subids = nvmlh->nvml_pci_subsys_id[gpuindex];
-	if(!subids) subids = nvmlh->nvml_pci_vendor_id[gpuindex];
+	if (!subids) subids = nvmlh->nvml_pci_vendor_id[gpuindex];
 	pid = subids >> 16;
 	vid = subids & 0xFFFF;
 	// Colorful and Inno3D
-	if(pid == 0) pid = nvmlh->nvml_pci_vendor_id[gpuindex] >> 16;
+	if (pid == 0) pid = nvmlh->nvml_pci_vendor_id[gpuindex] >> 16;
 	return 0;
 }
 
@@ -889,15 +831,15 @@ int nvml_destroy(nvml_handle *nvmlh)
 // ----------------------------------------------------------------------------
 
 /**
-* nvapi alternative for windows x86 binaries
-* nvml api doesn't exists as 32bit dll :///
-*/
+ * nvapi alternative for windows x86 binaries
+ * nvml api doesn't exists as 32bit dll :///
+ */
 #ifdef WIN32
 #include "nvapi/nvapi_ccminer.h"
 
-static unsigned int nvapi_dev_map[MAX_GPUS] = {0};
-static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = {0};
-static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = {0};
+static unsigned int nvapi_dev_map[MAX_GPUS] = { 0 };
+static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 };
+static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 };
 static NvU32 nvapi_dev_cnt = 0;
 extern bool nvapi_dll_loaded;
 
@@ -905,22 +847,21 @@ int nvapi_temperature(unsigned int devNum, unsigned int *temperature)
 {
 	NvAPI_Status ret;
 
-	if(devNum >= nvapi_dev_cnt)
+	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	NV_GPU_THERMAL_SETTINGS thermal;
 	thermal.version = NV_GPU_THERMAL_SETTINGS_VER;
 	ret = NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &thermal);
-	if(ret != NVAPI_OK)
-	{
+	if (ret != NVAPI_OK) {
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if(opt_debug)
+		if (opt_debug)
 			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetThermalSettings: %s", string);
 		return -1;
 	}
 
-	(*temperature) = (unsigned int)thermal.sensor[0].currentTemp;
+	(*temperature) = (unsigned int) thermal.sensor[0].currentTemp;
 
 	return 0;
 }
@@ -929,21 +870,20 @@ int nvapi_fanspeed(unsigned int devNum, unsigned int *speed)
 {
 	NvAPI_Status ret;
 
-	if(devNum >= nvapi_dev_cnt)
+	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	NvU32 fanspeed = 0;
 	ret = NvAPI_GPU_GetTachReading(phys[devNum], &fanspeed);
-	if(ret != NVAPI_OK)
-	{
+	if (ret != NVAPI_OK) {
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if(opt_debug)
+		if (opt_debug)
 			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetTachReading: %s", string);
 		return -1;
 	}
 
-	(*speed) = (unsigned int)fanspeed;
+	(*speed) = (unsigned int) fanspeed;
 
 	return 0;
 }
@@ -952,21 +892,19 @@ int nvapi_getpstate(unsigned int devNum, unsigned int *pstate)
 {
 	NvAPI_Status ret;
 
-	if(devNum >= nvapi_dev_cnt)
+	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	NV_GPU_PERF_PSTATE_ID CurrentPstate = NVAPI_GPU_PERF_PSTATE_UNDEFINED; /* 16 */
 	ret = NvAPI_GPU_GetCurrentPstate(phys[devNum], &CurrentPstate);
-	if(ret != NVAPI_OK)
-	{
+	if (ret != NVAPI_OK) {
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if(opt_debug)
+		if (opt_debug)
 			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetCurrentPstate: %s", string);
 		return -1;
 	}
-	else
-	{
+	else {
 		// get pstate for the moment... often 0 = P0
 		(*pstate) = (unsigned int)CurrentPstate;
 	}
@@ -979,23 +917,21 @@ int nvapi_getusage(unsigned int devNum, unsigned int *pct)
 {
 	NvAPI_Status ret;
 
-	if(devNum >= nvapi_dev_cnt)
+	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	NV_GPU_DYNAMIC_PSTATES_INFO_EX info;
 	info.version = NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER;
 	ret = NvAPI_GPU_GetDynamicPstatesInfoEx(phys[devNum], &info);
-	if(ret != NVAPI_OK)
-	{
+	if (ret != NVAPI_OK) {
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if(opt_debug)
+		if (opt_debug)
 			applog(LOG_DEBUG, "NVAPI GetDynamicPstatesInfoEx: %s", string);
 		return -1;
 	}
-	else
-	{
-		if(info.utilization[UTIL_DOMAIN_GPU].bIsPresent)
+	else {
+		if (info.utilization[UTIL_DOMAIN_GPU].bIsPresent)
 			(*pct) = info.utilization[UTIL_DOMAIN_GPU].percentage;
 	}
 
@@ -1007,27 +943,25 @@ int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid)
 	NvAPI_Status ret;
 	NvU32 pDeviceId, pSubSystemId, pRevisionId, pExtDeviceId;
 
-	if(devNum >= nvapi_dev_cnt)
+	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	ret = NvAPI_GPU_GetPCIIdentifiers(phys[devNum], &pDeviceId, &pSubSystemId, &pRevisionId, &pExtDeviceId);
-	if(ret != NVAPI_OK)
-	{
+	if (ret != NVAPI_OK) {
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if(opt_debug)
+		if (opt_debug)
 			applog(LOG_DEBUG, "NVAPI GetPCIIdentifiers: %s", string);
 		return -1;
 	}
 
 	pid = pDeviceId >> 16;
 	vid = pDeviceId & 0xFFFF;
-	if(vid == 0x10DE && pSubSystemId)
-	{
+	if (vid == 0x10DE && pSubSystemId) {
 		vid = pSubSystemId & 0xFFFF;
 		pid = pSubSystemId >> 16;
 		// Colorful and Inno3D
-		if(pid == 0) pid = pDeviceId >> 16;
+		if (pid == 0) pid = pDeviceId >> 16;
 	}
 
 	return 0;
@@ -1036,45 +970,43 @@ int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid)
 int nvapi_getserial(unsigned int devNum, char *serial, unsigned int maxlen)
 {
 	NvAPI_Status ret;
-	if(devNum >= nvapi_dev_cnt)
+	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
 	memset(serial, 0, maxlen);
 
-	if(maxlen < 11)
+	if (maxlen < 11)
 		return -EINVAL;
 
-	NvAPI_ShortString ser = {0};
+	NvAPI_ShortString ser = { 0 };
 	ret = NvAPI_DLL_GetSerialNumber(phys[devNum], ser);
-	if(ret != NVAPI_OK)
-	{
+	if (ret != NVAPI_OK) {
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if(opt_debug)
+		if (opt_debug)
 			applog(LOG_DEBUG, "NVAPI GetSerialNumber: %s", string);
 		return -1;
 	}
 
-	uint8_t *bytes = (uint8_t*)ser;
-	for(int n = 0; n<5; n++) sprintf(&serial[n * 2], "%02X", bytes[n]);
+	uint8_t *bytes = (uint8_t*) ser;
+	for (int n=0; n<5; n++) sprintf(&serial[n*2], "%02X", bytes[n]);
 	return 0;
 }
 
 int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen)
 {
 	NvAPI_Status ret;
-	if(devNum >= nvapi_dev_cnt)
+	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;
 
-	if(maxlen < 64) // Short String
+	if (maxlen < 64) // Short String
 		return -1;
 
 	ret = NvAPI_GPU_GetVbiosVersionString(phys[devNum], desc);
-	if(ret != NVAPI_OK)
-	{
+	if (ret != NVAPI_OK) {
 		NvAPI_ShortString string;
 		NvAPI_GetErrorMessage(ret, string);
-		if(opt_debug)
+		if (opt_debug)
 			applog(LOG_DEBUG, "NVAPI GetVbiosVersionString: %s", string);
 		return -1;
 	}
@@ -1089,28 +1021,27 @@ static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevStat
 	int delay1 = 20000;
 	int delay2 = 0;
 
-	uchar4 rgb = {0};
+	uchar4 rgb = { 0 };
 	memcpy(&rgb, &RGB, 4);
-	uchar4 prgb = {0};
+	uchar4 prgb = { 0 };
 	int32_t prev = device_led_state[nvapi_devid(devNum)];
 	memcpy(&prgb, &prev, 4);
 
 	NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo);
-	if(i2cInfo == NULL) return -ENOMEM;
+	if (i2cInfo == NULL) return -ENOMEM;
 
-	NvU32 data[5] = {0};
-	NvU32 datv[2] = {0, 1};
-	NvU32 datw[2] = {1, 0};
-	if(rgb.z != prgb.z || ignorePrevState)
-	{
+	NvU32 data[5] = { 0 };
+	NvU32 datv[2] = { 0, 1 };
+	NvU32 datw[2] = { 1, 0 };
+	if (rgb.z != prgb.z || ignorePrevState) {
 		data[2] = 4; // R:4 G:5 B:6, Mode = 7 (1 static, 2 breath, 3 blink, 4 demo)
 		data[3] = 1;
 		datv[0] = rgb.z | 0x13384000;
 
 		i2cInfo->i2cDevAddress = 0x52;
-		i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+		i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
 		i2cInfo->regAddrSize = 1;
-		i2cInfo->pbData = (NvU8*)datv;
+		i2cInfo->pbData = (NvU8*) datv;
 		i2cInfo->cbRead = 5;
 		i2cInfo->cbSize = 1;
 		i2cInfo->portId = 1;
@@ -1121,16 +1052,15 @@ static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevStat
 		has_rgb_ok = (ret == NVAPI_OK);
 	}
 
-	if(rgb.y != prgb.y || ignorePrevState)
-	{
+	if (rgb.y != prgb.y || ignorePrevState) {
 		data[2] = 5;
 		data[3] = 1;
 		datv[0] = rgb.y | 0x4000;
 
 		i2cInfo->i2cDevAddress = 0x52;
-		i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+		i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
 		i2cInfo->regAddrSize = 1;
-		i2cInfo->pbData = (NvU8*)datv;
+		i2cInfo->pbData = (NvU8*) datv;
 		i2cInfo->cbRead = 5;
 		i2cInfo->cbSize = 1;
 		i2cInfo->portId = 1;
@@ -1141,16 +1071,15 @@ static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevStat
 		has_rgb_ok = (ret == NVAPI_OK);
 	}
 
-	if(rgb.y != prgb.y || ignorePrevState)
-	{
+	if (rgb.y != prgb.y || ignorePrevState) {
 		data[2] = 6;
 		data[3] = 1;
 		datv[0] = rgb.x | 0x4000;
 
 		i2cInfo->i2cDevAddress = 0x52;
-		i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+		i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
 		i2cInfo->regAddrSize = 1;
-		i2cInfo->pbData = (NvU8*)datv;
+		i2cInfo->pbData = (NvU8*) datv;
 		i2cInfo->cbRead = 5;
 		i2cInfo->cbSize = 1;
 		i2cInfo->portId = 1;
@@ -1161,16 +1090,15 @@ static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevStat
 		has_rgb_ok = (ret == NVAPI_OK);
 	}
 
-	if(rgb.w && ignorePrevState)
-	{
+	if (rgb.w && ignorePrevState) {
 		data[2] = 7;
 		data[3] = 1;
 		datv[0] = rgb.w | 0x4000;
 
 		i2cInfo->i2cDevAddress = 0x52;
-		i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+		i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
 		i2cInfo->regAddrSize = 1;
-		i2cInfo->pbData = (NvU8*)datv;
+		i2cInfo->pbData = (NvU8*) datv;
 		i2cInfo->cbRead = 5;
 		i2cInfo->cbSize = 1;
 		i2cInfo->portId = 1;
@@ -1182,7 +1110,7 @@ static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevStat
 	}
 	usleep(delay2);
 	free(i2cInfo);
-	return (int)ret;
+	return (int) ret;
 }
 
 static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB)
@@ -1190,18 +1118,18 @@ static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB)
 	NvAPI_Status ret;
 	NV_I2C_INFO_EX* i2cInfo;
 	NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo);
-	if(i2cInfo == NULL)
+	if (i2cInfo == NULL)
 		return -ENOMEM;
 
-	NvU32 readBuf[25] = {0};
-	NvU32 data[5] = {0};
+	NvU32 readBuf[25] = { 0 };
+	NvU32 data[5] = { 0 };
 	data[0] = 1;
 	data[2] = swab32(RGB & 0xfcfcfcU) | 0x40;
 
 	i2cInfo->i2cDevAddress = 0x48 << 1;
-	i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+	i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
 	i2cInfo->regAddrSize = 4; // NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS
-	i2cInfo->pbData = (NvU8*)readBuf;
+	i2cInfo->pbData = (NvU8*) readBuf;
 	i2cInfo->cbRead = 2;
 	i2cInfo->cbSize = sizeof(readBuf);
 	i2cInfo->portId = 1;
@@ -1211,7 +1139,7 @@ static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB)
 	ret = NvAPI_DLL_I2CReadEx(phys[devNum], i2cInfo, data);
 	usleep(20000);
 	free(i2cInfo);
-	return (int)ret;
+	return (int) ret;
 }
 
 static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB)
@@ -1219,34 +1147,34 @@ static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB)
 	NvAPI_Status ret;
 	NV_I2C_INFO* i2cInfo;
 	NV_INIT_STRUCT_ALLOC(NV_I2C_INFO, i2cInfo);
-	if(i2cInfo == NULL)
+	if (i2cInfo == NULL)
 		return -ENOMEM;
 
-	NvU32 buf[25] = {0};
-	NvU32 data[5] = {0};
+	NvU32 buf[25] = { 0 };
+	NvU32 data[5] = { 0 };
 
 	uint32_t color = 0, level = 0x40;
 
-	uchar4 rgb = {0};
+	uchar4 rgb = { 0 };
 	memcpy(&rgb, &RGB, 4);
-	level = rgb.x & 0xF0;
+	level  = rgb.x & 0xF0;
 	level |= rgb.y & 0xF0;
 	level |= rgb.z & 0xF0;
 	//applog(LOG_DEBUG, "R %u G %u B %u", rgb.z, rgb.y, rgb.x);
 
 	// Not really RGB custom, only some basic colors, so convert
 	// 0: Red, 1: Yellow, 2: Green, 3: Cyan, 4: Blue, 5: magenta, 6: white
-	if((RGB & 0xFF0000) && (RGB & 0xFF00) && (RGB & 0xFF)) color = 6;
-	else if((RGB & 0xFF0000) && (RGB & 0xFF)) color = 5;
-	else if((RGB & 0xFF00) && (RGB & 0xFF)) color = 3;
-	else if((RGB & 0xFF0000) && (RGB & 0xFF00)) color = 1;
-	else if(RGB & 0xFF) color = 4;
-	else if(RGB & 0xFF00) color = 2;
+	if ((RGB & 0xFF0000) && (RGB & 0xFF00) && (RGB & 0xFF)) color = 6;
+	else if ((RGB & 0xFF0000) && (RGB & 0xFF)) color = 5;
+	else if ((RGB & 0xFF00) && (RGB & 0xFF)) color = 3;
+	else if ((RGB & 0xFF0000) && (RGB & 0xFF00)) color = 1;
+	else if (RGB & 0xFF) color = 4;
+	else if (RGB & 0xFF00) color = 2;
 
 	buf[0] = 0xF0; // F0 set colors
 	buf[0] |= (color << 8);  // logo
 	buf[0] |= (1 << 16); // top
-	if(RGB != 0) // level : 0x10 to 0xF0
+	if (RGB != 0) // level : 0x10 to 0xF0
 		buf[0] |= (level << 24);
 	else
 		buf[0] |= (0x10U << 24);
@@ -1256,9 +1184,9 @@ static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB)
 	i2cInfo->displayMask = 1;
 	i2cInfo->bIsDDCPort = 1;
 	i2cInfo->i2cDevAddress = 0x48 << 1;
-	i2cInfo->pbI2cRegAddress = (NvU8*)(&data[2]);
+	i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
 	i2cInfo->regAddrSize = 1;
-	i2cInfo->pbData = (NvU8*)buf;
+	i2cInfo->pbData = (NvU8*) buf;
 	i2cInfo->cbSize = 4;
 	i2cInfo->i2cSpeed = NVAPI_I2C_SPEED_DEPRECATED;
 	i2cInfo->i2cSpeedKhz = NVAPI_I2C_SPEED_100KHZ; // 4
@@ -1271,7 +1199,7 @@ static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB)
 
 #if 0
 	buf[0] = 0xF7; // F7 toggle leds
-	if(RGB == 0)
+	if (RGB == 0)
 		buf[0] |= (1 << 8);  // 0 logo on, 1 off
 	buf[0] |= (1 << 16); // 1 top off
 	ret = NvAPI_I2CWrite(phys[devNum], i2cInfo);
@@ -1283,49 +1211,41 @@ static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB)
 	// 0xF3 cycle           (0x000000F3)
 
 	free(i2cInfo);
-	return (int)ret;
+	return (int) ret;
 }
 
 int nvapi_set_led(unsigned int devNum, int RGB, char *device_name)
 {
 	uint16_t vid = 0, pid = 0;
 	NvAPI_Status ret;
-	if(strstr(device_name, "Gigabyte GTX 10"))
-	{
-		if(opt_debug)
-			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int)phys[devNum], RGB);
-		return SetGigabyteRGBLogo(devNum, (uint32_t)RGB);
-	}
-	else if(strstr(device_name, "ASUS GTX 10"))
-	{
-		if(opt_debug)
-			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int)phys[devNum], RGB);
-		return SetAsusRGBLogo(devNum, (uint32_t)RGB, !has_rgb_ok);
-	}
-	else if(strstr(device_name, "Zotac GTX 10"))
-	{
-		if(opt_debug)
-			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int)phys[devNum], RGB);
-		return SetZotacRGBLogo(devNum, (uint32_t)RGB);
-	}
-	else
-	{
+	if (strstr(device_name, "Gigabyte GTX 10")) {
+		if (opt_debug)
+			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
+		return SetGigabyteRGBLogo(devNum, (uint32_t) RGB);
+	} else if (strstr(device_name, "ASUS GTX 10")) {
+		if (opt_debug)
+			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
+		return SetAsusRGBLogo(devNum, (uint32_t) RGB, !has_rgb_ok);
+	} else if (strstr(device_name, "Zotac GTX 10")) {
+		if (opt_debug)
+			applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
+		return SetZotacRGBLogo(devNum, (uint32_t) RGB);
+	} else {
 		NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM* illu;
 		NV_INIT_STRUCT_ALLOC(NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM, illu);
 		illu->hPhysicalGpu = phys[devNum];
 		illu->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS;
 		ret = NvAPI_GPU_QueryIlluminationSupport(illu);
-		if(!ret && illu->bSupported)
-		{
+		if (!ret && illu->bSupported) {
 			NV_GPU_GET_ILLUMINATION_PARM *led;
 			NV_INIT_STRUCT_ALLOC(NV_GPU_GET_ILLUMINATION_PARM, led);
 			led->hPhysicalGpu = phys[devNum];
 			led->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS;
 			NvAPI_GPU_GetIllumination(led);
-			if(opt_debug)
-				applog(LOG_DEBUG, "GPU %x: Led level was %d, set to %d", (int)phys[devNum], led->Value, RGB);
-			led->Value = (uint32_t)RGB;
-			ret = NvAPI_GPU_SetIllumination((NV_GPU_SET_ILLUMINATION_PARM*)led);
+			if (opt_debug)
+				applog(LOG_DEBUG, "GPU %x: Led level was %d, set to %d", (int) phys[devNum], led->Value, RGB);
+			led->Value = (uint32_t) RGB;
+			ret = NvAPI_GPU_SetIllumination((NV_GPU_SET_ILLUMINATION_PARM*) led);
 			free(led);
 		}
 		free(illu);
@@ -1337,8 +1257,8 @@ int nvapi_pstateinfo(unsigned int devNum)
 {
 	uint32_t n;
 	NvAPI_Status ret;
-	uint32_t* mem = (uint32_t*)calloc(1, 0x4000);
-	if(!mem)
+	uint32_t* mem = (uint32_t*) calloc(1, 0x4000);
+	if (!mem)
 		return -ENOMEM;
 
 	unsigned int current = 0xFF;
@@ -1347,19 +1267,17 @@ int nvapi_pstateinfo(unsigned int devNum)
 
 #if 0
 	// try :p
-	uint32_t* buf = (uint32_t*)calloc(1, 0x8000);
-	for(int i = 8; i < 0x8000 && buf; i += 4)
-	{
+	uint32_t* buf = (uint32_t*) calloc(1, 0x8000);
+	for (int i=8; i < 0x8000 && buf; i+=4) {
 		buf[0] = 0x10000 + i;
 		NV_GPU_PERF_PSTATE_ID pst = NVAPI_GPU_PERF_PSTATE_P0;
 		ret = NvAPI_DLL_GetPstateClientLimits(phys[devNum], pst, buf);
-		if(ret != NVAPI_INCOMPATIBLE_STRUCT_VERSION)
-		{
+		if (ret != NVAPI_INCOMPATIBLE_STRUCT_VERSION) {
 			NvAPI_ShortString string;
 			NvAPI_GetErrorMessage(ret, string);
 			applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string);
-			for(int n = 0; n < i / 32; n++)
-				applog_hex(&buf[n*(32 / 4)], 32);
+			for (int n=0; n < i/32; n++)
+				applog_hex(&buf[n*(32/4)], 32);
 			break;
 		}
 	}
@@ -1368,1098 +1286,991 @@ int nvapi_pstateinfo(unsigned int devNum)
 
 #if 0
 	// Unsure of the meaning of these values
-	NVAPI_GPU_POWER_TOPO topo = {0};
+	NVAPI_GPU_POWER_TOPO topo = { 0 };
 	topo.version = NVAPI_GPU_POWER_TOPO_VER;
-	if((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK)
-	{
-		if(topo.count)
+	if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) {
+		if (topo.count)
 			applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?",
-			(double)topo.entries[0].power / 1000, (double)topo.entries[1].power / 1000);
-		// Ok on 970, not pascal
-		NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = {0};
-		pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2;
-		pset2.ov.numVoltages = 1;
-		pset2.ov.voltages[0].voltDelta_uV.value = 3000;  // gpu + 3000 uv;
-		ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2);
+			(double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000);
+
+	// Ok on 970, not pascal
+	NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 };
+	pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2;
+	pset2.ov.numVoltages = 1;
+	pset2.ov.voltages[0].voltDelta_uV.value = 3000;  // gpu + 3000 uv;
+	ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2);
 #endif
 
-		NV_GPU_PERF_PSTATES20_INFO* info;
-		NV_INIT_STRUCT_ON(NV_GPU_PERF_PSTATES20_INFO, info, mem);
-		if((ret = NvAPI_GPU_GetPstates20(phys[devNum], info)) != NVAPI_OK)
-		{
-			NvAPI_ShortString string;
-			NvAPI_GetErrorMessage(ret, string);
-			if(opt_debug)
-				applog(LOG_RAW, "NVAPI GetPstates20: %s", string);
-			return -1;
-		}
-
-		for(n = 0; n < info->numPstates; n++)
-		{
-			NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info->pstates[n].clocks;
-			applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d",
-				   info->pstates[n].pstateId == current ? ">" : " ", (int)info->pstates[n].pstateId,
-				   clocks[1].data.single.freq_kHz / 1000, clocks[1].bIsEditable ? "*" : " ",
-				   (double)clocks[0].data.single.freq_kHz / 1000, clocks[0].bIsEditable ? "*" : " ",
-				   info->pstates[n].baseVoltages[0].volt_uV / 1000, info->pstates[n].baseVoltages[0].bIsEditable ? "*" : " ",
-				   info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min / 1000, // range if editable
-				   info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max / 1000);
-			if(clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value)
-			{
-				applog(LOG_RAW, "      OC %+4d MHz      %+6.1f MHz",
-					   clocks[1].freqDelta_kHz.value / 1000, (double)clocks[0].freqDelta_kHz.value / 1000);
-			}
-		}
-		// boost over volting (GTX 9xx only ?)
-		for(n = 0; n < info->ov.numVoltages; n++)
-		{
-			applog(LOG_RAW, " OV: %u%+d mV%s \x7F %d/%d",
-				   info->ov.voltages[n].volt_uV / 1000, info->ov.voltages[n].voltDelta_uV.value / 1000, info->ov.voltages[n].bIsEditable ? "*" : " ",
-				   info->ov.voltages[n].voltDelta_uV.valueRange.min / 1000, info->ov.voltages[n].voltDelta_uV.valueRange.max / 1000);
-		}
-
-		NV_GPU_CLOCK_FREQUENCIES *freqs;
-		NV_INIT_STRUCT_ON(NV_GPU_CLOCK_FREQUENCIES, freqs, mem);
-		freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
-		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
-		applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz     Base Clocks",
-			(double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
-			   (double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
-
-		freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
-		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
-		applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz     Boost Clocks",
-			(double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
-			   (double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
-
-		freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
-		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
-		applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz    >Current",
-			(double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
-			   (double)freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
-
-		// Other clock values ??
-		NVAPI_GPU_PERF_CLOCKS *pcl;
-		NV_INIT_STRUCT_ALLOC(NVAPI_GPU_PERF_CLOCKS, pcl);
-		int numClock = 0; ret = NVAPI_OK;
-		while(ret == NVAPI_OK)
-		{
-			if((ret = NvAPI_DLL_GetPerfClocks(phys[devNum], numClock, pcl)) == NVAPI_OK)
-			{
-				applog(LOG_RAW, " C%d: MEM %4.0f MHz  GPU %6.1f MHz [%5.1f/%6.1f]", numClock,
-					(double)pcl->memFreq1 / 1000, (double)pcl->gpuFreq1 / 1000, (double)pcl->gpuFreqMin / 1000, (double)pcl->gpuFreqMax / 1000);
-				//	ret = NvAPI_DLL_SetPerfClocks(phys[devNum], numClock, pcl); // error
-			}
-			numClock++;
-		}
-
-		// Pascal only
-		NVAPI_VOLTBOOST_PERCENT *pvb;
-		NV_INIT_STRUCT_ON(NVAPI_VOLTBOOST_PERCENT, pvb, mem);
-		if((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], pvb)) == NVAPI_OK)
-		{
-			NVAPI_VOLTAGE_STATUS *pvdom;
-			NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGE_STATUS, pvdom);
-			NvAPI_DLL_GetCurrentVoltage(phys[devNum], pvdom);
-			if(pvdom && pvdom->value_uV)
-				applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom->value_uV / 1000, pvb->percent);
-			else if(pvdom)
-				applog(LOG_RAW, " GPU Voltage is %u mV", pvdom->value_uV / 1000);
-			free(pvdom);
-		}
-		else
-		{
-			// Maxwell 9xx
-			NVAPI_VOLT_STATUS *mvdom, *mvstep;
-			NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvdom);
-			if(mvdom && (ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], mvdom)) == NVAPI_OK)
-			{
-				NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvstep);
-				NvAPI_DLL_GetVoltageStep(phys[devNum], mvstep);
-				if(mvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %.1f mV with %.3f mV resolution",
-					(double)mvdom->value_uV / 1000, (double)mvstep->value_uV / 1000);
-				free(mvstep);
-			}
-			free(mvdom);
-		}
-
-		uint32_t plim = nvapi_get_plimit(devNum);
-		double min_pw = 0, max_pw = 0; // percent
+	NV_GPU_PERF_PSTATES20_INFO* info;
+	NV_INIT_STRUCT_ON(NV_GPU_PERF_PSTATES20_INFO, info, mem);
+	if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], info)) != NVAPI_OK) {
+		NvAPI_ShortString string;
+		NvAPI_GetErrorMessage(ret, string);
+		if (opt_debug)
+			applog(LOG_RAW, "NVAPI GetPstates20: %s", string);
+		return -1;
+	}
 
-		NVAPI_GPU_POWER_INFO nfo = {0};
-		nfo.version = NVAPI_GPU_POWER_INFO_VER;
-		ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
-		if(ret == NVAPI_OK && nfo.valid)
-		{
-			min_pw = (double)nfo.entries[0].min_power / 1000;
-			max_pw = (double)nfo.entries[0].max_power / 1000;
-		}
-		applog(LOG_RAW, " Power limit is set to %u%%, range [%.0f-%.0f%%]", plim, min_pw, max_pw);
+	for (n=0; n < info->numPstates; n++) {
+		NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info->pstates[n].clocks;
+		applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d",
+			info->pstates[n].pstateId == current ? ">":" ", (int) info->pstates[n].pstateId,
+			clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ",
+			(double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ",
+			info->pstates[n].baseVoltages[0].volt_uV/1000, info->pstates[n].baseVoltages[0].bIsEditable ? "*": " ",
+			info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
+			info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
+		if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
+			applog(LOG_RAW, "      OC %+4d MHz      %+6.1f MHz",
+				clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
+		}
+	}
+	// boost over volting (GTX 9xx only ?)
+	for (n=0; n < info->ov.numVoltages; n++) {
+		applog(LOG_RAW, " OV: %u%+d mV%s \x7F %d/%d",
+			info->ov.voltages[n].volt_uV/1000, info->ov.voltages[n].voltDelta_uV.value/1000, info->ov.voltages[n].bIsEditable ? "*":" ",
+			info->ov.voltages[n].voltDelta_uV.valueRange.min/1000, info->ov.voltages[n].voltDelta_uV.valueRange.max/1000);
+	}
+
+	NV_GPU_CLOCK_FREQUENCIES *freqs;
+	NV_INIT_STRUCT_ON(NV_GPU_CLOCK_FREQUENCIES, freqs, mem);
+	freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz     Base Clocks",
+		(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+		(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+	freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz     Boost Clocks",
+		(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+		(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+	freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz    >Current",
+		(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
+		(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
+
+	// Other clock values ??
+	NVAPI_GPU_PERF_CLOCKS *pcl;
+	NV_INIT_STRUCT_ALLOC(NVAPI_GPU_PERF_CLOCKS, pcl);
+	int numClock=0; ret = NVAPI_OK;
+	while (ret == NVAPI_OK) {
+		if ((ret = NvAPI_DLL_GetPerfClocks(phys[devNum], numClock, pcl)) == NVAPI_OK) {
+			applog(LOG_RAW, " C%d: MEM %4.0f MHz  GPU %6.1f MHz [%5.1f/%6.1f]", numClock,
+				(double) pcl->memFreq1/1000, (double) pcl->gpuFreq1/1000, (double) pcl->gpuFreqMin/1000, (double) pcl->gpuFreqMax/1000);
+		//	ret = NvAPI_DLL_SetPerfClocks(phys[devNum], numClock, pcl); // error
+		}
+		numClock++;
+	}
+
+	// Pascal only
+	NVAPI_VOLTBOOST_PERCENT *pvb;
+	NV_INIT_STRUCT_ON(NVAPI_VOLTBOOST_PERCENT, pvb, mem);
+	if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], pvb)) == NVAPI_OK) {
+		NVAPI_VOLTAGE_STATUS *pvdom;
+		NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGE_STATUS, pvdom);
+		NvAPI_DLL_GetCurrentVoltage(phys[devNum], pvdom);
+		if (pvdom && pvdom->value_uV)
+			applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom->value_uV/1000, pvb->percent);
+		else if (pvdom)
+			applog(LOG_RAW, " GPU Voltage is %u mV", pvdom->value_uV/1000);
+		free(pvdom);
+	} else {
+		// Maxwell 9xx
+		NVAPI_VOLT_STATUS *mvdom, *mvstep;
+		NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvdom);
+		if (mvdom && (ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], mvdom)) == NVAPI_OK) {
+			NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvstep);
+			NvAPI_DLL_GetVoltageStep(phys[devNum], mvstep);
+			if (mvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %.1f mV with %.3f mV resolution",
+				(double) mvdom->value_uV/1000, (double) mvstep->value_uV/1000);
+			free(mvstep);
+		}
+		free(mvdom);
+	}
+
+	uint32_t plim = nvapi_get_plimit(devNum);
+	double min_pw = 0, max_pw = 0; // percent
+
+	NVAPI_GPU_POWER_INFO nfo = { 0 };
+	nfo.version = NVAPI_GPU_POWER_INFO_VER;
+	ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
+	if (ret == NVAPI_OK && nfo.valid) {
+		min_pw = (double)nfo.entries[0].min_power / 1000;
+		max_pw = (double)nfo.entries[0].max_power / 1000;
+	}
+	applog(LOG_RAW, " Power limit is set to %u%%, range [%.0f-%.0f%%]", plim, min_pw, max_pw);
 
 #if 0
-		NVAPI_COOLER_SETTINGS *cooler;
-		NV_INIT_STRUCT_ON(NVAPI_COOLER_SETTINGS, cooler, mem);
-		ret = NvAPI_DLL_GetCoolerSettings(phys[devNum], 7, cooler);
-		if(ret == NVAPI_OK)
-		{
-			applog(LOG_RAW, " Fan level is set to %u%%", cooler->level); // wrong val, seems 1 (auto ?)
-			NVAPI_COOLER_LEVEL *fan;
-			NV_INIT_STRUCT_ALLOC(NVAPI_COOLER_LEVEL, fan);
-			fan->level = 100;
-			fan->count = 1;
-			ret = NvAPI_DLL_SetCoolerLevels(phys[devNum], 7, fan);
-			free(fan);
-			sleep(10);
-			ret = NvAPI_DLL_RestoreCoolerSettings(phys[devNum], cooler, 7);
-		}
+	NVAPI_COOLER_SETTINGS *cooler;
+	NV_INIT_STRUCT_ON(NVAPI_COOLER_SETTINGS, cooler, mem);
+	ret = NvAPI_DLL_GetCoolerSettings(phys[devNum], 7, cooler);
+	if (ret == NVAPI_OK) {
+		applog(LOG_RAW, " Fan level is set to %u%%", cooler->level); // wrong val, seems 1 (auto ?)
+		NVAPI_COOLER_LEVEL *fan;
+		NV_INIT_STRUCT_ALLOC(NVAPI_COOLER_LEVEL, fan);
+		fan->level = 100;
+		fan->count = 1;
+		ret = NvAPI_DLL_SetCoolerLevels(phys[devNum], 7, fan);
+		free(fan);
+		sleep(10);
+		ret = NvAPI_DLL_RestoreCoolerSettings(phys[devNum], cooler, 7);
+	}
 #endif
 
-		NV_GPU_THERMAL_SETTINGS *tset;
-		NV_INIT_STRUCT_ON(NV_GPU_THERMAL_SETTINGS, tset, mem);
+	NV_GPU_THERMAL_SETTINGS *tset;
+	NV_INIT_STRUCT_ON(NV_GPU_THERMAL_SETTINGS, tset, mem);
 
-		NVAPI_GPU_THERMAL_INFO *tnfo;
-		NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_INFO, tnfo);
-		NVAPI_GPU_THERMAL_LIMIT *tlim;
-		NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_LIMIT, tlim);
-		NvAPI_GPU_GetThermalSettings(phys[devNum], 0, tset);
-		NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], tnfo);
-		if((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], tlim)) == NVAPI_OK)
-		{
-			applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]",
-				   tlim->entries[0].value >> 8, tset->sensor[0].currentTemp,
-				   tnfo->entries[0].min_temp >> 8, tnfo->entries[0].max_temp >> 8);
-		}
-		free(tnfo);
-		free(tlim);
+	NVAPI_GPU_THERMAL_INFO *tnfo;
+	NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_INFO, tnfo);
+	NVAPI_GPU_THERMAL_LIMIT *tlim;
+	NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_LIMIT, tlim);
+	NvAPI_GPU_GetThermalSettings(phys[devNum], 0, tset);
+	NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], tnfo);
+	if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], tlim)) == NVAPI_OK) {
+		applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]",
+			tlim->entries[0].value >> 8, tset->sensor[0].currentTemp,
+			tnfo->entries[0].min_temp >> 8, tnfo->entries[0].max_temp >> 8);
+	}
+	free(tnfo);
+	free(tlim);
 
 #if 1
-		// Read pascal Clocks Table, Empty on 9xx
-		//NVAPI_CLOCKS_RANGE* ranges;
-		//NV_INIT_STRUCT_ON(NVAPI_CLOCKS_RANGE, ranges, mem);
-		//ret = NvAPI_DLL_GetClockBoostRanges(phys[devNum], ranges);
-
-		NVAPI_CLOCK_MASKS* boost;
-		NV_INIT_STRUCT_ON(NVAPI_CLOCK_MASKS, boost, mem);
-		ret = NvAPI_DLL_GetClockBoostMask(phys[devNum], boost);
-		int gpuClocks = 0, memClocks = 0;
-		for(n = 0; n < 80 + 23; n++)
-		{
-			if(boost->clocks[n].memDelta) memClocks++;
-			if(boost->clocks[n].gpuDelta) gpuClocks++;
-		}
-
-		// PASCAL GTX ONLY
-		if(gpuClocks || memClocks)
-		{
-			NVAPI_CLOCK_TABLE *table;
-			NV_INIT_STRUCT_ALLOC(NVAPI_CLOCK_TABLE, table);
-			memcpy(table->mask, boost->mask, 12);
-			ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], table);
-			gpuClocks = 0, memClocks = 0;
-			for(n = 0; n < 12; n++)
-			{
-				if(table->buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table->buf0[n]);
-			}
-			for(n = 0; n < 80; n++)
-			{
-				if(table->gpuDeltas[n].freqDelta)
-				{
-					// note: gpu delta value seems to be x2, not the memory
-					//applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table->gpuDeltas[n].freqDelta/2000);
-					gpuClocks++;
-				}
-			}
-			for(n = 0; n < 23; n++)
-			{
-				if(table->memFilled[n])
-				{
-					//applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table->memDeltas[n]/1000);
-					memClocks++;
-				}
-			}
-			for(n = 0; n < 1529; n++)
-			{
-				if(table->buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table->buf1[n]);
-			}
-			applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
-			free(table);
-
-			NVAPI_VFP_CURVE *curve;
-			NV_INIT_STRUCT_ALLOC(NVAPI_VFP_CURVE, curve);
-			memcpy(curve->mask, boost->mask, 12);
-			ret = NvAPI_DLL_GetVFPCurve(phys[devNum], curve);
-			gpuClocks = 0, memClocks = 0;
-			for(n = 0; n < 80; n++)
-			{
-				if(curve->gpuEntries[n].freq_kHz || curve->gpuEntries[n].volt_uV)
-				{
-					//	applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve->gpuEntries[n].freq_kHz/1000, curve->gpuEntries[n].volt_uV/1000);
-					gpuClocks++;
-				}
+	// Read pascal Clocks Table, Empty on 9xx
+	//NVAPI_CLOCKS_RANGE* ranges;
+	//NV_INIT_STRUCT_ON(NVAPI_CLOCKS_RANGE, ranges, mem);
+	//ret = NvAPI_DLL_GetClockBoostRanges(phys[devNum], ranges);
+
+	NVAPI_CLOCK_MASKS* boost;
+	NV_INIT_STRUCT_ON(NVAPI_CLOCK_MASKS, boost, mem);
+	ret = NvAPI_DLL_GetClockBoostMask(phys[devNum], boost);
+	int gpuClocks = 0, memClocks = 0;
+	for (n=0; n < 80+23; n++) {
+		if (boost->clocks[n].memDelta) memClocks++;
+		if (boost->clocks[n].gpuDelta) gpuClocks++;
+	}
+
+	// PASCAL GTX ONLY
+	if (gpuClocks || memClocks) {
+		NVAPI_CLOCK_TABLE *table;
+		NV_INIT_STRUCT_ALLOC(NVAPI_CLOCK_TABLE, table);
+		memcpy(table->mask, boost->mask, 12);
+		ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], table);
+		gpuClocks = 0, memClocks = 0;
+		for (n=0; n < 12; n++) {
+			if (table->buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table->buf0[n]);
+		}
+		for (n=0; n < 80; n++) {
+			if (table->gpuDeltas[n].freqDelta) {
+				// note: gpu delta value seems to be x2, not the memory
+				//applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table->gpuDeltas[n].freqDelta/2000);
+				gpuClocks++;
 			}
-			for(n = 0; n < 23; n++)
-			{
-				if(curve->memEntries[n].freq_kHz || curve->memEntries[n].volt_uV)
-				{
-					//	applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve->memEntries[n].freq_kHz/1000, curve->memEntries[n].volt_uV/1000);
-					memClocks++;
-				}
+		}
+		for (n=0; n < 23; n++) {
+			if (table->memFilled[n]) {
+				//applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table->memDeltas[n]/1000);
+				memClocks++;
 			}
-			for(n = 0; n < 1064; n++)
-			{
-				if(curve->buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve->buf1[n]);
+		}
+		for (n=0; n < 1529; n++) {
+			if (table->buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table->buf1[n]);
+		}
+		applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
+		free(table);
+
+		NVAPI_VFP_CURVE *curve;
+		NV_INIT_STRUCT_ALLOC(NVAPI_VFP_CURVE, curve);
+		memcpy(curve->mask, boost->mask, 12);
+		ret = NvAPI_DLL_GetVFPCurve(phys[devNum], curve);
+		gpuClocks = 0, memClocks = 0;
+		for (n=0; n < 80; n++) {
+			if (curve->gpuEntries[n].freq_kHz || curve->gpuEntries[n].volt_uV) {
+			//	applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve->gpuEntries[n].freq_kHz/1000, curve->gpuEntries[n].volt_uV/1000);
+				gpuClocks++;
 			}
-			applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
-			free(curve);
 		}
-
-		// Maxwell
-		else
-		{
-			NVAPI_VOLTAGES_TABLE* volts;
-			NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGES_TABLE, volts);
-			int entries = 0;
-			ret = NvAPI_DLL_GetVoltages(phys[devNum], volts);
-			for(n = 0; n < 128; n++)
-			{
-				if(volts->entries[n].volt_uV)
-					entries++;
+		for (n=0; n < 23; n++) {
+			if (curve->memEntries[n].freq_kHz || curve->memEntries[n].volt_uV) {
+			//	applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve->memEntries[n].freq_kHz/1000, curve->memEntries[n].volt_uV/1000);
+				memClocks++;
 			}
-			applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
-			free(volts);
 		}
+		for (n=0; n < 1064; n++) {
+			if (curve->buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve->buf1[n]);
+		}
+		applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
+		free(curve);
+	}
 
-		NV_DISPLAY_DRIVER_MEMORY_INFO* meminfo;
-		NV_INIT_STRUCT_ON(NV_DISPLAY_DRIVER_MEMORY_INFO, meminfo, mem);
-		meminfo->version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
-		if((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], meminfo)) == NVAPI_OK)
-		{
-			applog(LOG_RAW, " Memory: %u MB, %.1f used", meminfo->dedicatedVideoMemory / 1024,
-				(double)(meminfo->availableDedicatedVideoMemory - meminfo->curAvailableDedicatedVideoMemory) / 1024);
+	// Maxwell
+	else {
+		NVAPI_VOLTAGES_TABLE* volts;
+		NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGES_TABLE, volts);
+		int entries = 0;
+		ret = NvAPI_DLL_GetVoltages(phys[devNum], volts);
+		for (n=0; n < 128; n++) {
+			if (volts->entries[n].volt_uV)
+				entries++;
 		}
+		applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
+		free(volts);
+	}
+
+	NV_DISPLAY_DRIVER_MEMORY_INFO* meminfo;
+	NV_INIT_STRUCT_ON(NV_DISPLAY_DRIVER_MEMORY_INFO, meminfo, mem);
+	meminfo->version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
+	if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], meminfo)) == NVAPI_OK) {
+		applog(LOG_RAW, " Memory: %u MB, %.1f used", meminfo->dedicatedVideoMemory/1024,
+			(double) (meminfo->availableDedicatedVideoMemory - meminfo->curAvailableDedicatedVideoMemory)/1024);
+	}
 #if 0 /* some undetermined stats */
-		NVAPI_GPU_PERF_INFO pi = {0};
-		pi.version = NVAPI_GPU_PERF_INFO_VER;
-		ret = NvAPI_DLL_PerfPoliciesGetInfo(phys[devNum], &pi);
-		NVAPI_GPU_PERF_STATUS ps = {0};
-		ps.version = NVAPI_GPU_PERF_STATUS_VER;
-		ret = NvAPI_DLL_PerfPoliciesGetStatus(phys[devNum], &ps);
-		applog(LOG_BLUE, "%llx %lld. %lld. %llx %llx %llx", ps.timeRef, ps.val1, ps.val2, ps.values[0], ps.values[1], ps.values[2]);
+	NVAPI_GPU_PERF_INFO pi = { 0 };
+	pi.version = NVAPI_GPU_PERF_INFO_VER;
+	ret = NvAPI_DLL_PerfPoliciesGetInfo(phys[devNum], &pi);
+
+	NVAPI_GPU_PERF_STATUS ps = { 0 };
+	ps.version = NVAPI_GPU_PERF_STATUS_VER;
+	ret = NvAPI_DLL_PerfPoliciesGetStatus(phys[devNum], &ps);
+	applog(LOG_BLUE, "%llx %lld. %lld. %llx %llx %llx", ps.timeRef, ps.val1, ps.val2, ps.values[0], ps.values[1], ps.values[2]);
 #endif
 
 #endif
-		free(mem);
-		return 0;
-	}
+	free(mem);
+	return 0;
+}
 
-	// workaround for buggy driver 378.49
-	unsigned int nvapi_get_gpu_clock(unsigned int devNum)
-	{
-		NvAPI_Status ret = NVAPI_OK;
-		unsigned int freq = 0;
-		NV_GPU_CLOCK_FREQUENCIES *freqs;
-		NV_INIT_STRUCT_ALLOC(NV_GPU_CLOCK_FREQUENCIES, freqs);
-		freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
-		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
-		if(ret == NVAPI_OK)
-		{
-			freq = freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000;
-		}
-		free(freqs);
-		return freq; // in MHz
+// workaround for buggy driver 378.49
+unsigned int nvapi_get_gpu_clock(unsigned int devNum)
+{
+	NvAPI_Status ret = NVAPI_OK;
+	unsigned int freq = 0;
+	NV_GPU_CLOCK_FREQUENCIES *freqs;
+	NV_INIT_STRUCT_ALLOC(NV_GPU_CLOCK_FREQUENCIES, freqs);
+	freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
+	if (ret == NVAPI_OK) {
+		freq = freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000;
+	}
+	free(freqs);
+	return freq; // in MHz
+}
+
+uint8_t nvapi_get_plimit(unsigned int devNum)
+{
+	NvAPI_Status ret = NVAPI_OK;
+	NVAPI_GPU_POWER_STATUS pol = { 0 };
+	pol.version = NVAPI_GPU_POWER_STATUS_VER;
+	if ((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK) {
+		NvAPI_ShortString string;
+		NvAPI_GetErrorMessage(ret, string);
+		if (opt_debug)
+			applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string);
+		return 0;
 	}
+	return (uint8_t) (pol.entries[0].power / 1000); // in percent
+}
 
-	uint8_t nvapi_get_plimit(unsigned int devNum)
-	{
-		NvAPI_Status ret = NVAPI_OK;
-		NVAPI_GPU_POWER_STATUS pol = {0};
-		pol.version = NVAPI_GPU_POWER_STATUS_VER;
-		if((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK)
-		{
-			NvAPI_ShortString string;
-			NvAPI_GetErrorMessage(ret, string);
-			if(opt_debug)
-				applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string);
-			return 0;
-		}
-		return (uint8_t)(pol.entries[0].power / 1000); // in percent
+int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
+{
+	NvAPI_Status ret = NVAPI_OK;
+	uint32_t val = percent * 1000;
+
+	NVAPI_GPU_POWER_INFO nfo = { 0 };
+	nfo.version = NVAPI_GPU_POWER_INFO_VER;
+	ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
+	if (ret == NVAPI_OK) {
+		if (val == 0)
+			val = nfo.entries[0].def_power;
+		else if (val < nfo.entries[0].min_power)
+			val = nfo.entries[0].min_power;
+		else if (val > nfo.entries[0].max_power)
+			val = nfo.entries[0].max_power;
+	}
+
+	NVAPI_GPU_POWER_STATUS pol = { 0 };
+	pol.version = NVAPI_GPU_POWER_STATUS_VER;
+	pol.flags = 1;
+	pol.entries[0].power = val;
+	if ((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK) {
+		NvAPI_ShortString string;
+		NvAPI_GetErrorMessage(ret, string);
+		if (opt_debug)
+			applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string);
+		return -1;
 	}
+	return ret;
+}
 
-	int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
-	{
-		NvAPI_Status ret = NVAPI_OK;
-		uint32_t val = percent * 1000;
+int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
+{
+	NvAPI_Status ret;
+	uint32_t val = limit;
 
-		NVAPI_GPU_POWER_INFO nfo = {0};
-		nfo.version = NVAPI_GPU_POWER_INFO_VER;
-		ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
-		if(ret == NVAPI_OK)
-		{
-			if(val == 0)
-				val = nfo.entries[0].def_power;
-			else if(val < nfo.entries[0].min_power)
-				val = nfo.entries[0].min_power;
-			else if(val > nfo.entries[0].max_power)
-				val = nfo.entries[0].max_power;
-		}
+	if (devNum >= nvapi_dev_cnt)
+		return -ENODEV;
 
-		NVAPI_GPU_POWER_STATUS pol = {0};
-		pol.version = NVAPI_GPU_POWER_STATUS_VER;
-		pol.flags = 1;
-		pol.entries[0].power = val;
-		if((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK)
-		{
+	NV_GPU_THERMAL_SETTINGS tset = { 0 };
+	NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
+	NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
+	tset.version = NV_GPU_THERMAL_SETTINGS_VER;
+	NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset);
+	tnfo.version = NVAPI_GPU_THERMAL_INFO_VER;
+	NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo);
+	tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER;
+	if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) {
+		tlim.entries[0].value = val << 8;
+		tlim.flags = 1;
+		ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
+		if (ret == NVAPI_OK) {
+			applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]",
+				devNum, val, tset.sensor[0].currentTemp,
+				tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
+		} else {
 			NvAPI_ShortString string;
 			NvAPI_GetErrorMessage(ret, string);
-			if(opt_debug)
-				applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string);
-			return -1;
-		}
-		return ret;
-	}
-
-	int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
-	{
-		NvAPI_Status ret;
-		uint32_t val = limit;
-
-		if(devNum >= nvapi_dev_cnt)
-			return -ENODEV;
-
-		NV_GPU_THERMAL_SETTINGS tset = {0};
-		NVAPI_GPU_THERMAL_INFO tnfo = {0};
-		NVAPI_GPU_THERMAL_LIMIT tlim = {0};
-		tset.version = NV_GPU_THERMAL_SETTINGS_VER;
-		NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset);
-		tnfo.version = NVAPI_GPU_THERMAL_INFO_VER;
-		NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo);
-		tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER;
-		if((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK)
-		{
-			tlim.entries[0].value = val << 8;
-			tlim.flags = 1;
-			ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
-			if(ret == NVAPI_OK)
-			{
-				applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]",
-					   devNum, val, tset.sensor[0].currentTemp,
-					   tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
-			}
-			else
-			{
-				NvAPI_ShortString string;
-				NvAPI_GetErrorMessage(ret, string);
-				applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string,
-					   tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
-			}
+			applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string,
+				tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
 		}
-		return (int)ret;
 	}
+	return (int) ret;
+}
 
-	int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
-	{
-		NvAPI_Status ret;
-		NvS32 delta = 0;
+int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
+{
+	NvAPI_Status ret;
+	NvS32 delta = 0;
 
-		if(devNum >= nvapi_dev_cnt)
-			return -ENODEV;
+	if (devNum >= nvapi_dev_cnt)
+		return -ENODEV;
 #if 0
-		// wrong api to get default base clock when modified, cuda props seems fine
-		NV_GPU_CLOCK_FREQUENCIES freqs = {0};
-		freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
-		freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
-		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
-		if(ret == NVAPI_OK)
-		{
-			delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
-		}
-		NV_GPU_PERF_PSTATES_INFO deffreqs = {0};
-		deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
-		ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
-		if(ret == NVAPI_OK)
-		{
-			if(deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
-				delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq * 2;
-		}
+	// wrong api to get default base clock when modified, cuda props seems fine
+	NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
+	freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
+	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
+	if (ret == NVAPI_OK)  {
+		delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
+	}
+
+	NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
+	deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
+	ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
+	if (ret == NVAPI_OK) {
+		if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
+			delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2;
+	}
 #endif
 
-		cudaDeviceProp props = {0};
-		NvU32 busId = 0xFFFF;
-		ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
-		for(int d = 0; d < (int)nvapi_dev_cnt; d++)
-		{
-			// unsure about devNum, so be safe
-			cudaGetDeviceProperties(&props, d);
-			if(props.pciBusID == busId)
-			{
-				delta = (clock * 1000) - props.clockRate;
-				break;
-			}
+	cudaDeviceProp props = { 0 };
+	NvU32 busId = 0xFFFF;
+	ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
+	for (int d=0; d < (int) nvapi_dev_cnt; d++) {
+		 // unsure about devNum, so be safe
+		cudaGetDeviceProperties(&props, d);
+		if (props.pciBusID == busId) {
+			delta = (clock * 1000) - props.clockRate;
+			break;
 		}
+	}
 
-		if(delta == (clock * 1000))
-			return ret;
-
-		NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = {0};
-		pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
-		pset1.numPstates = 1;
-		pset1.numClocks = 1;
-		// Ok on both 1080 and 970
-		pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
-		pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
-		ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
-		if(ret == NVAPI_OK)
-		{
-			applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta / 1000);
-		}
+	if (delta == (clock * 1000))
 		return ret;
-	}
 
-	int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
-	{
-		NvAPI_Status ret;
-		NvS32 delta = 0;
-
-		if(devNum >= nvapi_dev_cnt)
-			return -ENODEV;
-
-		// wrong to get default base clock (when modified) on maxwell (same as cuda props one)
-		NV_GPU_CLOCK_FREQUENCIES freqs = {0};
-		freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
-		freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
-		ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
-		if(ret == NVAPI_OK)
-		{
-			delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
-		}
+	NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
+	pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+	pset1.numPstates = 1;
+	pset1.numClocks = 1;
+	// Ok on both 1080 and 970
+	pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
+	pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
+	ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+	if (ret == NVAPI_OK) {
+		applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000);
+	}
+	return ret;
+}
 
-		// seems ok on maxwell and pascal for the mem clocks
-		NV_GPU_PERF_PSTATES_INFO deffreqs = {0};
-		deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
-		ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
-		if(ret == NVAPI_OK)
-		{
-			if(deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
-				delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
-		}
+int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
+{
+	NvAPI_Status ret;
+	NvS32 delta = 0;
 
-		if(delta == (clock * 1000))
-			return ret;
+	if (devNum >= nvapi_dev_cnt)
+		return -ENODEV;
 
-		// todo: bounds check with GetPstates20
+	// wrong to get default base clock (when modified) on maxwell (same as cuda props one)
+	NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
+	freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
+	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
+	if (ret == NVAPI_OK)  {
+		delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
+	}
 
-		NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = {0};
-		pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
-		pset1.numPstates = 1;
-		pset1.numClocks = 1;
-		pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
-		pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
-		ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
-		if(ret == NVAPI_OK)
-		{
-			applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta / 1000);
-		}
-		return ret;
+	// seems ok on maxwell and pascal for the mem clocks
+	NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
+	deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
+	ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
+	if (ret == NVAPI_OK) {
+		if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
+			delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
 	}
 
-	static int nvapi_set_memoffset(unsigned int devNum, int32_t delta, bool log = true)
-	{
-		NvAPI_Status ret;
-		NvS32 deltaKHz = delta * 1000;
-
-		if(devNum >= nvapi_dev_cnt)
-			return -ENODEV;
-
-		// todo: bounds check with GetPstates20
-
-		NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = {0};
-		pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
-		pset1.numPstates = 1;
-		pset1.numClocks = 1;
-		pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
-		pset1.pstates[0].clocks[0].freqDelta_kHz.value = deltaKHz;
-		ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
-		if(ret == NVAPI_OK)
-		{
-			if(log) applog(LOG_INFO, "GPU #%u: Memory clock offset set to %+d MHz", devNum, deltaKHz / 1000);
-			need_memclockrst = true;
-		}
+	if (delta == (clock * 1000))
 		return ret;
+
+	// todo: bounds check with GetPstates20
+
+	NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
+	pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+	pset1.numPstates = 1;
+	pset1.numClocks = 1;
+	pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
+	pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
+	ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+	if (ret == NVAPI_OK) {
+		applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000);
 	}
+	return ret;
+}
 
-	// Replacement for WIN32 CUDA 6.5 on pascal
-	int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
-	{
-		NvAPI_Status ret = NVAPI_OK;
-		NV_DISPLAY_DRIVER_MEMORY_INFO mem = {0};
-		mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
-		unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
-		if((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK)
-		{
-			*total = (uint64_t)mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
-			*free = (uint64_t)mem.curAvailableDedicatedVideoMemory;
-		}
-		return (int)ret;
+static int nvapi_set_memoffset(unsigned int devNum, int32_t delta, bool log=true)
+{
+	NvAPI_Status ret;
+	NvS32 deltaKHz = delta * 1000;
+
+	if (devNum >= nvapi_dev_cnt)
+		return -ENODEV;
+
+	// todo: bounds check with GetPstates20
+
+	NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
+	pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+	pset1.numPstates = 1;
+	pset1.numClocks = 1;
+	pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
+	pset1.pstates[0].clocks[0].freqDelta_kHz.value = deltaKHz;
+	ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+	if (ret == NVAPI_OK) {
+		if (log) applog(LOG_INFO, "GPU #%u: Memory clock offset set to %+d MHz", devNum, deltaKHz / 1000);
+		need_memclockrst = true;
 	}
+	return ret;
+}
 
-	int nvapi_init()
-	{
-		int num_gpus = cuda_num_devices();
-		NvAPI_Status ret = NvAPI_Initialize();
-		if(ret != NVAPI_OK)
-		{
-			NvAPI_ShortString string;
-			NvAPI_GetErrorMessage(ret, string);
-			if(opt_debug)
-				applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string);
-			return -1;
-		}
+// Replacement for WIN32 CUDA 6.5 on pascal
+int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
+{
+	NvAPI_Status ret = NVAPI_OK;
+	NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
+	mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
+	unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
+	if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
+		*total = (uint64_t) mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
+		*free  = (uint64_t) mem.curAvailableDedicatedVideoMemory;
+	}
+	return (int) ret;
+}
 
-		ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt);
-		if(ret != NVAPI_OK)
-		{
-			NvAPI_ShortString string;
-			NvAPI_GetErrorMessage(ret, string);
-			if(opt_debug)
-				applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string);
-			return -1;
-		}
+int nvapi_init()
+{
+	int num_gpus = cuda_num_devices();
+	NvAPI_Status ret = NvAPI_Initialize();
+	if (ret != NVAPI_OK) {
+		NvAPI_ShortString string;
+		NvAPI_GetErrorMessage(ret, string);
+		if (opt_debug)
+			applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string);
+		return -1;
+	}
 
-		for(int g = 0; g < num_gpus; g++)
-		{
-			cudaDeviceProp props;
-			if(cudaGetDeviceProperties(&props, g) == cudaSuccess)
-			{
-				device_bus_ids[g] = props.pciBusID;
-			}
-			nvapi_dev_map[g] = g; // default mapping
-		}
+	ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt);
+	if (ret != NVAPI_OK) {
+		NvAPI_ShortString string;
+		NvAPI_GetErrorMessage(ret, string);
+		if (opt_debug)
+			applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string);
+		return -1;
+	}
 
-		for(NvU8 i = 0; i < nvapi_dev_cnt; i++)
-		{
-			NvAPI_ShortString name;
-			ret = NvAPI_GPU_GetFullName(phys[i], name);
-			if(ret == NVAPI_OK)
-			{
-				for(int g = 0; g < num_gpus; g++)
-				{
-					NvU32 busId;
-					ret = NvAPI_GPU_GetBusId(phys[i], &busId);
-					if(ret == NVAPI_OK && busId == device_bus_ids[g])
-					{
-						nvapi_dev_map[g] = i;
-						if(opt_debug)
-							applog(LOG_DEBUG, "CUDA GPU %d matches NVAPI GPU %d by busId %u",
-								   g, i, busId);
-						break;
-					}
+	for (int g = 0; g < num_gpus; g++) {
+		cudaDeviceProp props;
+		if (cudaGetDeviceProperties(&props, g) == cudaSuccess) {
+			device_bus_ids[g] = props.pciBusID;
+		}
+		nvapi_dev_map[g] = g; // default mapping
+	}
+
+	for (NvU8 i = 0; i < nvapi_dev_cnt; i++) {
+		NvAPI_ShortString name;
+		ret = NvAPI_GPU_GetFullName(phys[i], name);
+		if (ret == NVAPI_OK) {
+			for (int g = 0; g < num_gpus; g++) {
+				NvU32 busId;
+				ret = NvAPI_GPU_GetBusId(phys[i], &busId);
+				if (ret == NVAPI_OK && busId == device_bus_ids[g]) {
+					nvapi_dev_map[g] = i;
+					if (opt_debug)
+						applog(LOG_DEBUG, "CUDA GPU %d matches NVAPI GPU %d by busId %u",
+							g, i, busId);
+					break;
 				}
 			}
-			else
-			{
-				NvAPI_ShortString string;
-				NvAPI_GetErrorMessage(ret, string);
-				applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
-			}
+		} else {
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
 		}
+	}
 #if 0
-		if(opt_debug)
-		{
-			NvAPI_ShortString ver;
-			NvAPI_GetInterfaceVersionString(ver);
-			applog(LOG_DEBUG, "%s", ver);
-		}
+	if (opt_debug) {
+		NvAPI_ShortString ver;
+		NvAPI_GetInterfaceVersionString(ver);
+		applog(LOG_DEBUG, "%s", ver);
+	}
 #endif
 
-		NvU32 udv;
-		NvAPI_ShortString str;
-		ret = NvAPI_SYS_GetDriverAndBranchVersion(&udv, str);
-		if(ret == NVAPI_OK)
-		{
-			sprintf(driver_version, "%d.%02d", udv / 100, udv % 100);
-		}
-
-		return 0;
+	NvU32 udv;
+	NvAPI_ShortString str;
+	ret = NvAPI_SYS_GetDriverAndBranchVersion(&udv, str);
+	if (ret == NVAPI_OK) {
+		sprintf(driver_version,"%d.%02d", udv / 100, udv % 100);
 	}
 
-	int nvapi_init_settings()
-	{
-		// nvapi.dll
-		int ret = nvapi_dll_init();
-		if(ret != NVAPI_OK)
-			return ret;
+	return 0;
+}
 
-		if(!opt_n_threads)
-		{
-			opt_n_threads = active_gpus;
-		}
+int nvapi_init_settings()
+{
+	// nvapi.dll
+	int ret = nvapi_dll_init();
+	if (ret != NVAPI_OK)
+		return ret;
 
-		for(int n = 0; n < opt_n_threads; n++)
-		{
-			int dev_id = device_map[n % MAX_GPUS];
-			if(device_plimit[dev_id] && !nvml_plimit_set)
-			{
-				if(nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK)
-				{
-					uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]);
-					gpulog(LOG_INFO, n, "Power limit is set to %u%%", res);
-				}
-			}
-			if(device_tlimit[dev_id])
-			{
-				nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]);
-			}
-			if(device_gpu_clocks[dev_id])
-			{
-				ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]);
-				if(ret)
-				{
-					NvAPI_ShortString string;
-					NvAPI_GetErrorMessage((NvAPI_Status)ret, string);
-					gpulog(LOG_WARNING, n, "nvapi_set_gpuclock %s", string);
-				}
+	if (!opt_n_threads) {
+		opt_n_threads = active_gpus;
+	}
+
+	for (int n=0; n < opt_n_threads; n++) {
+		int dev_id = device_map[n % MAX_GPUS];
+		if (device_plimit[dev_id] && !nvml_plimit_set) {
+			if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) {
+				uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]);
+				gpulog(LOG_INFO, n, "Power limit is set to %u%%", res);
 			}
-			if(device_mem_offsets[dev_id])
-			{
-				ret = nvapi_set_memoffset(nvapi_dev_map[dev_id], device_mem_offsets[dev_id]);
-				if(ret)
-				{
-					NvAPI_ShortString string;
-					NvAPI_GetErrorMessage((NvAPI_Status)ret, string);
-					gpulog(LOG_WARNING, n, "nvapi_set_memoffset %s", string);
-				}
+		}
+		if (device_tlimit[dev_id]) {
+			nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]);
+		}
+		if (device_gpu_clocks[dev_id]) {
+			ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]);
+			if (ret) {
+				NvAPI_ShortString string;
+				NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
+				gpulog(LOG_WARNING, n, "nvapi_set_gpuclock %s", string);
 			}
-			else if(device_mem_clocks[dev_id])
-			{
-				ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]);
-				if(ret)
-				{
-					NvAPI_ShortString string;
-					NvAPI_GetErrorMessage((NvAPI_Status)ret, string);
-					gpulog(LOG_WARNING, n, "nvapi_set_memclock %s", string);
-				}
+		}
+		if (device_mem_offsets[dev_id]) {
+			ret = nvapi_set_memoffset(nvapi_dev_map[dev_id], device_mem_offsets[dev_id]);
+			if (ret) {
+				NvAPI_ShortString string;
+				NvAPI_GetErrorMessage((NvAPI_Status)ret, string);
+				gpulog(LOG_WARNING, n, "nvapi_set_memoffset %s", string);
 			}
-			if(device_pstate[dev_id])
-			{
-				// dunno how via nvapi or/and pascal
+		}
+		else if (device_mem_clocks[dev_id]) {
+			ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]);
+			if (ret) {
+				NvAPI_ShortString string;
+				NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
+				gpulog(LOG_WARNING, n, "nvapi_set_memclock %s", string);
 			}
-			if(device_led[dev_id] != -1)
-			{
-				int err = nvapi_set_led(nvapi_dev_map[dev_id], device_led[dev_id], device_name[dev_id]);
-				if(err != 0)
-				{
-					gpulog(LOG_WARNING, n, "Unable to set led value (err %d)", err);
-				}
-				device_led_state[dev_id] = device_led[dev_id];
+		}
+		if (device_pstate[dev_id]) {
+			// dunno how via nvapi or/and pascal
+		}
+		if (device_led[dev_id] != -1) {
+			int err = nvapi_set_led(nvapi_dev_map[dev_id], device_led[dev_id], device_name[dev_id]);
+			if (err != 0) {
+				gpulog(LOG_WARNING, n, "Unable to set led value (err %d)", err);
 			}
+			device_led_state[dev_id] = device_led[dev_id];
 		}
-
-		return ret;
 	}
 
-	void nvapi_toggle_clocks(int thr_id, bool enable)
-	{
-		int dev_id = device_map[thr_id % MAX_GPUS];
-		if(device_mem_offsets[dev_id])
-		{
-			nvapi_set_memoffset(nvapi_dev_map[dev_id], enable ? device_mem_offsets[dev_id] : 0, false);
-		}
-	}
+	return ret;
+}
 
-	unsigned int nvapi_devnum(int dev_id)
-	{
-		return nvapi_dev_map[dev_id];
+void nvapi_toggle_clocks(int thr_id, bool enable)
+{
+	int dev_id = device_map[thr_id % MAX_GPUS];
+	if (device_mem_offsets[dev_id]) {
+		nvapi_set_memoffset(nvapi_dev_map[dev_id], enable ? device_mem_offsets[dev_id] : 0, false);
 	}
+}
 
-	int nvapi_devid(unsigned int devNum)
-	{
-		for(int i = 0; i < opt_n_threads; i++)
-		{
-			int dev_id = device_map[i % MAX_GPUS];
-			if(nvapi_dev_map[dev_id] = devNum)
-				return dev_id;
-		}
-		return 0;
+unsigned int nvapi_devnum(int dev_id)
+{
+	return nvapi_dev_map[dev_id];
+}
+
+int nvapi_devid(unsigned int devNum)
+{
+	for (int i=0; i < opt_n_threads; i++) {
+		int dev_id = device_map[i % MAX_GPUS];
+		if (nvapi_dev_map[dev_id] = devNum)
+			return dev_id;
 	}
+	return 0;
+}
 
 #endif /* WIN32 : Windows specific (nvapi) */
 
-	/* api functions -------------------------------------- */
+/* api functions -------------------------------------- */
 
-	// assume 2500 rpm as default, auto-updated if more
-	static unsigned int fan_speed_max = 2500;
+// assume 2500 rpm as default, auto-updated if more
+static unsigned int fan_speed_max = 2500;
 
-	unsigned int gpu_fanpercent(struct cgpu_info *gpu)
-	{
-		unsigned int pct = 0;
-		if(hnvml)
-		{
-			nvml_get_fanpcnt(hnvml, gpu->gpu_id, &pct);
-		}
+unsigned int gpu_fanpercent(struct cgpu_info *gpu)
+{
+	unsigned int pct = 0;
+	if (hnvml) {
+		nvml_get_fanpcnt(hnvml, gpu->gpu_id, &pct);
+	}
 #ifdef WIN32
-		else
-		{
-			unsigned int rpm = 0;
-			nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
-			pct = (rpm * 100) / fan_speed_max;
-			if(pct > 100)
-			{
-				pct = 100;
-				fan_speed_max = rpm;
-			}
+	else {
+		unsigned int rpm = 0;
+		nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
+		pct = (rpm * 100) / fan_speed_max;
+		if (pct > 100) {
+			pct = 100;
+			fan_speed_max = rpm;
 		}
-#endif
-		return pct;
 	}
+#endif
+	return pct;
+}
 
-	unsigned int gpu_fanrpm(struct cgpu_info *gpu)
-	{
-		unsigned int rpm = 0;
+unsigned int gpu_fanrpm(struct cgpu_info *gpu)
+{
+	unsigned int rpm = 0;
 #ifdef WIN32
-		nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
+	nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
 #endif
-		return rpm;
-	}
+	return rpm;
+}
 
 
-	float gpu_temp(struct cgpu_info *gpu)
-	{
-		float tc = 0.0;
-		unsigned int tmp = 0;
-		if(hnvml)
-		{
-			nvml_get_tempC(hnvml, gpu->gpu_id, &tmp);
-			tc = (float)tmp;
-		}
+float gpu_temp(struct cgpu_info *gpu)
+{
+	float tc = 0.0;
+	unsigned int tmp = 0;
+	if (hnvml) {
+		nvml_get_tempC(hnvml, gpu->gpu_id, &tmp);
+		tc = (float)tmp;
+	}
 #ifdef WIN32
-		else
-		{
-			nvapi_temperature(nvapi_dev_map[gpu->gpu_id], &tmp);
-			tc = (float)tmp;
-		}
-#endif
-		return tc;
+	else {
+		nvapi_temperature(nvapi_dev_map[gpu->gpu_id], &tmp);
+		tc = (float)tmp;
 	}
+#endif
+	return tc;
+}
 
-	int gpu_pstate(struct cgpu_info *gpu)
-	{
-		int pstate = -1;
-		int support = -1;
-		if(hnvml)
-		{
-			support = nvml_get_pstate(hnvml, gpu->gpu_id, &pstate);
-		}
+int gpu_pstate(struct cgpu_info *gpu)
+{
+	int pstate = -1;
+	int support = -1;
+	if (hnvml) {
+		support = nvml_get_pstate(hnvml, gpu->gpu_id, &pstate);
+	}
 #ifdef WIN32
-		if(support == -1)
-		{
-			unsigned int pst = 0;
-			nvapi_getpstate(nvapi_dev_map[gpu->gpu_id], &pst);
-			pstate = (int)pst;
-		}
-#endif
-		return pstate;
+	if (support == -1) {
+		unsigned int pst = 0;
+		nvapi_getpstate(nvapi_dev_map[gpu->gpu_id], &pst);
+		pstate = (int) pst;
 	}
+#endif
+	return pstate;
+}
 
-	int gpu_busid(struct cgpu_info *gpu)
-	{
-		int busid = -1;
-		int support = -1;
-		if(hnvml)
-		{
-			support = nvml_get_busid(hnvml, gpu->gpu_id, &busid);
-		}
+int gpu_busid(struct cgpu_info *gpu)
+{
+	int busid = -1;
+	int support = -1;
+	if (hnvml) {
+		support = nvml_get_busid(hnvml, gpu->gpu_id, &busid);
+	}
 #ifdef WIN32
-		if(support == -1)
-		{
-			busid = device_bus_ids[gpu->gpu_id];
-		}
-#endif
-		return busid;
+	if (support == -1) {
+		busid = device_bus_ids[gpu->gpu_id];
 	}
+#endif
+	return busid;
+}
 
-	unsigned int gpu_power(struct cgpu_info *gpu)
-	{
-		unsigned int mw = 0;
-		int support = -1;
-		if(hnvml)
-		{
-			support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw);
-		}
+unsigned int gpu_power(struct cgpu_info *gpu)
+{
+	unsigned int mw = 0;
+	int support = -1;
+	if (hnvml) {
+		support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw);
+	}
 #ifdef WIN32
-		if(support == -1)
-		{
-			unsigned int pct = 0;
-			nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
-			pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
-			pct /= 100;
-			mw = pct; // to fix
-		}
+	if (support == -1) {
+		unsigned int pct = 0;
+		nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
+		pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
+		pct /= 100;
+		mw = pct; // to fix
+	}
 #endif
-		if(gpu->gpu_power > 0)
-		{
-			// average
-			mw = (gpu->gpu_power + mw) / 2;
-		}
-		return mw;
+	if (gpu->gpu_power > 0) {
+		// average
+		mw = (gpu->gpu_power + mw) / 2;
 	}
+	return mw;
+}
 
-	unsigned int gpu_plimit(struct cgpu_info *gpu)
-	{
-		unsigned int mw = 0;
-		int support = -1;
-		if(hnvml)
-		{
-			mw = nvml_get_plimit(hnvml, gpu->gpu_id);
-			support = (mw > 0);
-		}
+unsigned int gpu_plimit(struct cgpu_info *gpu)
+{
+	unsigned int mw = 0;
+	int support = -1;
+	if (hnvml) {
+		mw = nvml_get_plimit(hnvml, gpu->gpu_id);
+		support = (mw > 0);
+	}
 #ifdef WIN32
-		// NVAPI value is in % (< 100 so)
-		if(support == -1)
-		{
-			mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
-		}
-#endif
-		return mw;
+	// NVAPI value is in % (< 100 so)
+	if (support == -1) {
+		mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
 	}
+#endif
+	return mw;
+}
 
-	static int translate_vendor_id(uint16_t vid, char *vendorname)
-	{
-		struct VENDORS
-		{
-			const uint16_t vid;
-			const char *name;
-		} vendors[] = {
-			{0x1043, "ASUS"},
-			{0x1048, "Elsa"},
-			{0x107D, "Leadtek"},
-			{0x10B0, "Gainward"},
-			// { 0x10DE, "NVIDIA" },
-			{0x1458, "Gigabyte"},
-			{0x1462, "MSI"},
-			{0x154B, "PNY"}, // maybe storage devices
-			{0x1569, "Palit"},
-			{0x1682, "XFX"},
-			{0x196D, "Club3D"},
-			{0x196E, "PNY"},
-			{0x19DA, "Zotac"},
-			{0x19F1, "BFG"},
-			{0x1ACC, "PoV"},
-			{0x1B4C, "Galax"}, // KFA2 in EU, to check on Pascal cards
-			{0x3842, "EVGA"},
-			{0x7377, "Colorful"},
-			{0, ""}
-		};
-
-		if(!vendorname)
-			return -EINVAL;
-
-		for(int v = 0; v < ARRAY_SIZE(vendors); v++)
-		{
-			if(vid == vendors[v].vid)
-			{
-				strcpy(vendorname, vendors[v].name);
-				return vid;
-			}
+static int translate_vendor_id(uint16_t vid, char *vendorname)
+{
+	struct VENDORS {
+		const uint16_t vid;
+		const char *name;
+	} vendors[] = {
+		{ 0x1043, "ASUS" },
+		{ 0x1048, "Elsa" },
+		{ 0x107D, "Leadtek" },
+		{ 0x10B0, "Gainward" },
+		// { 0x10DE, "NVIDIA" },
+		{ 0x1458, "Gigabyte" },
+		{ 0x1462, "MSI" },
+		{ 0x154B, "PNY" }, // maybe storage devices
+		{ 0x1569, "Palit" },
+		{ 0x1682, "XFX" },
+		{ 0x196D, "Club3D" },
+		{ 0x196E, "PNY" },
+		{ 0x19DA, "Zotac" },
+		{ 0x19F1, "BFG" },
+		{ 0x1ACC, "PoV" },
+		{ 0x1B4C, "Galax" }, // KFA2 in EU, to check on Pascal cards
+		{ 0x3842, "EVGA" },
+		{ 0x7377, "Colorful" },
+		{ 0, "" }
+	};
+
+	if (!vendorname)
+		return -EINVAL;
+
+	for(int v=0; v < ARRAY_SIZE(vendors); v++) {
+		if (vid == vendors[v].vid) {
+			strcpy(vendorname, vendors[v].name);
+			return vid;
 		}
-		if(opt_debug && vid != 0x10DE)
-			applog(LOG_DEBUG, "nvml: Unknown vendor %04x\n", vid);
-		return 0;
 	}
+	if (opt_debug && vid != 0x10DE)
+		applog(LOG_DEBUG, "nvml: Unknown vendor %04x\n", vid);
+	return 0;
+}
 
-	int gpu_vendor(uint8_t pci_bus_id, char *vendorname)
-	{
-		uint16_t vid = 0, pid = 0;
-		if(hnvml)
-		{ // may not be initialized on start...
-			for(int id = 0; id < hnvml->nvml_gpucount; id++)
-			{
-				if(hnvml->nvml_pci_bus_id[id] == pci_bus_id)
-				{
-					int dev_id = hnvml->nvml_cuda_device_id[id];
-					nvml_get_info(hnvml, dev_id, vid, pid);
-				}
+int gpu_vendor(uint8_t pci_bus_id, char *vendorname)
+{
+	uint16_t vid = 0, pid = 0;
+	if (hnvml) { // may not be initialized on start...
+		for (int id=0; id < hnvml->nvml_gpucount; id++) {
+			if (hnvml->nvml_pci_bus_id[id] == pci_bus_id) {
+				int dev_id = hnvml->nvml_cuda_device_id[id];
+				nvml_get_info(hnvml, dev_id, vid, pid);
 			}
 		}
-		else
-		{
+	} else {
 #ifdef WIN32
-			for(unsigned id = 0; id < nvapi_dev_cnt; id++)
-			{
-				if(device_bus_ids[id] == pci_bus_id)
-				{
-					nvapi_getinfo(nvapi_dev_map[id], vid, pid);
-					break;
-				}
+		for (unsigned id = 0; id < nvapi_dev_cnt; id++) {
+			if (device_bus_ids[id] == pci_bus_id) {
+				nvapi_getinfo(nvapi_dev_map[id], vid, pid);
+				break;
 			}
-#endif
 		}
-		return translate_vendor_id(vid, vendorname);
+#endif
 	}
+	return translate_vendor_id(vid, vendorname);
+}
 
-	int gpu_info(struct cgpu_info *gpu)
-	{
-		char vendorname[32] = {0};
-		int id = gpu->gpu_id;
-		uint8_t bus_id = 0;
+int gpu_info(struct cgpu_info *gpu)
+{
+	char vendorname[32] = { 0 };
+	int id = gpu->gpu_id;
+	uint8_t bus_id = 0;
 
-		gpu->nvml_id = -1;
-		gpu->nvapi_id = -1;
+	gpu->nvml_id = -1;
+	gpu->nvapi_id = -1;
 
-		if(id < 0)
-			return -1;
+	if (id < 0)
+		return -1;
 
-		if(hnvml)
-		{
-			gpu->nvml_id = (int8_t)hnvml->cuda_nvml_device_id[id];
-			nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid);
-			nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn));
-			nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc));
-		}
+	if (hnvml) {
+		gpu->nvml_id = (int8_t) hnvml->cuda_nvml_device_id[id];
+		nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid);
+		nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn));
+		nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc));
+	}
 #ifdef WIN32
-		gpu->nvapi_id = (int8_t)nvapi_dev_map[id];
-		nvapi_getinfo(nvapi_dev_map[id], gpu->gpu_vid, gpu->gpu_pid);
-		nvapi_getserial(nvapi_dev_map[id], gpu->gpu_sn, sizeof(gpu->gpu_sn));
-		nvapi_getbios(nvapi_dev_map[id], gpu->gpu_desc, sizeof(gpu->gpu_desc));
+	gpu->nvapi_id = (int8_t) nvapi_dev_map[id];
+	nvapi_getinfo(nvapi_dev_map[id], gpu->gpu_vid, gpu->gpu_pid);
+	nvapi_getserial(nvapi_dev_map[id], gpu->gpu_sn, sizeof(gpu->gpu_sn));
+	nvapi_getbios(nvapi_dev_map[id], gpu->gpu_desc, sizeof(gpu->gpu_desc));
 #endif
-		return 0;
-	}
+	return 0;
+}
 
 #endif /* USE_WRAPNVML */
 
-	static int rgb_percent(int RGB, int percent)
-	{
-		uint8_t* comp = (uint8_t*)&RGB;
-		int res = ((percent*comp[2]) / 100) << 16;
-		res += ((percent*comp[1]) / 100) << 8;
-		return res + ((percent*comp[0]) / 100);
-	}
+static int rgb_percent(int RGB, int percent)
+{
+	uint8_t* comp = (uint8_t*) &RGB;
+	int res = ((percent*comp[2]) / 100) << 16;
+	res += ((percent*comp[1]) / 100) << 8;
+	return res + ((percent*comp[0]) / 100);
+}
 
-	void gpu_led_on(int dev_id)
-	{
+void gpu_led_on(int dev_id)
+{
 #if defined(WIN32) && defined(USE_WRAPNVML)
-		int value = device_led[dev_id];
-		if(device_led_state[dev_id] != value)
-		{
-			if(nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
-				device_led_state[dev_id] = value;
-		}
-#endif
+	int value = device_led[dev_id];
+	if (device_led_state[dev_id] != value) {
+		if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
+			device_led_state[dev_id] = value;
 	}
+#endif
+}
 
-	void gpu_led_percent(int dev_id, int percent)
-	{
+void gpu_led_percent(int dev_id, int percent)
+{
 #if defined(WIN32) && defined(USE_WRAPNVML)
-		int value = rgb_percent(device_led[dev_id], percent);
-		if(device_led_state[dev_id] != value)
-		{
-			if(nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
-				device_led_state[dev_id] = value;
-		}
-#endif
+	int value = rgb_percent(device_led[dev_id], percent);
+	if (device_led_state[dev_id] != value) {
+		if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
+			device_led_state[dev_id] = value;
 	}
+#endif
+}
 
-	void gpu_led_off(int dev_id)
-	{
+void gpu_led_off(int dev_id)
+{
 #if defined(WIN32) && defined(USE_WRAPNVML)
-		if(device_led_state[dev_id])
-		{
-			if(nvapi_set_led(nvapi_dev_map[dev_id], 0, device_name[dev_id]) == 0)
-				device_led_state[dev_id] = 0;
-		}
-#endif
+	if (device_led_state[dev_id]) {
+		if (nvapi_set_led(nvapi_dev_map[dev_id], 0, device_name[dev_id]) == 0)
+			device_led_state[dev_id] = 0;
 	}
+#endif
+}
 
 #ifdef USE_WRAPNVML
-	extern double thr_hashrates[MAX_GPUS];
-	extern bool opt_debug_threads;
-	extern bool opt_hwmonitor;
-	extern int num_cpus;
+extern double thr_hashrates[MAX_GPUS];
+extern bool opt_debug_threads;
+extern bool opt_hwmonitor;
+extern int num_cpus;
 
-	void *monitor_thread(void *userdata)
+void *monitor_thread(void *userdata)
+{
+	int thr_id = -1;
+
+	while (!abort_flag && !opt_quiet)
 	{
-		int thr_id = -1;
+		// This thread monitors card's power lazily during scans, one at a time...
+		thr_id = (thr_id + 1) % opt_n_threads;
+		struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
+		int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id);
 
-		while(!abort_flag && !opt_quiet)
+		if (hnvml != NULL && cgpu)
 		{
-			// This thread monitors card's power lazily during scans, one at a time...
-			thr_id = (thr_id + 1) % opt_n_threads;
-			struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
-			int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id);
-
-			if(hnvml != NULL && cgpu)
-			{
-				char khw[32] = {0};
-				uint64_t clock = 0, mem_clock = 0;
-				uint32_t fanpercent = 0, power = 0;
-				double tempC = 0, khs_per_watt = 0;
-				uint32_t counter = 0;
-				int max_loops = 1000;
-
-				pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock);
-
-				do
-				{
-					unsigned int tmp_clock = 0, tmp_memclock = 0;
-					nvml_get_current_clocks(dev_id, &tmp_clock, &tmp_memclock);
+			char khw[32] = { 0 };
+			uint64_t clock = 0, mem_clock = 0;
+			uint32_t fanpercent = 0, power = 0;
+			double tempC = 0, khs_per_watt = 0;
+			uint32_t counter = 0;
+			int max_loops = 1000;
+
+			pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock);
+
+			do {
+				unsigned int tmp_clock=0, tmp_memclock=0;
+				nvml_get_current_clocks(dev_id, &tmp_clock, &tmp_memclock);
 #ifdef WIN32
-					if(tmp_clock < 200)
-					{
-						// workaround for buggy drivers 378.x (real clock)
-						tmp_clock = nvapi_get_gpu_clock(nvapi_dev_map[dev_id]);
-					}
+				if (tmp_clock < 200) {
+					// workaround for buggy drivers 378.x (real clock)
+					tmp_clock = nvapi_get_gpu_clock(nvapi_dev_map[dev_id]);
+				}
 #endif
-					if(tmp_clock < 200)
-					{
-						// some older cards only report a base clock with cuda props.
-						if(cuda_gpu_info(cgpu) == 0)
-						{
-							tmp_clock = cgpu->gpu_clock / 1000;
-							tmp_memclock = cgpu->gpu_memclock / 1000;
-						}
+				if (tmp_clock < 200) {
+					// some older cards only report a base clock with cuda props.
+					if (cuda_gpu_info(cgpu) == 0) {
+						tmp_clock = cgpu->gpu_clock/1000;
+						tmp_memclock = cgpu->gpu_memclock/1000;
 					}
-					clock += tmp_clock;
-					mem_clock += tmp_memclock;
-					tempC += gpu_temp(cgpu);
-					fanpercent += gpu_fanpercent(cgpu);
-					power += gpu_power(cgpu);
-					counter++;
-
-					usleep(50000);
-					if(abort_flag) goto abort;
-
-				} while(cgpu->monitor.sampling_flag && (--max_loops));
-
-				cgpu->monitor.gpu_temp = (uint32_t)(tempC / counter);
-				cgpu->monitor.gpu_fan = fanpercent / counter;
-				cgpu->monitor.gpu_power = power / counter;
-				cgpu->monitor.gpu_clock = (uint32_t)(clock / counter);
-				cgpu->monitor.gpu_memclock = (uint32_t)(mem_clock / counter);
-
-				if(power)
-				{
-					khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]);
-					khs_per_watt = khs_per_watt / ((double)power / counter);
-					format_hashrate(khs_per_watt * 1000, khw);
-					if(strlen(khw))
-						sprintf(&khw[strlen(khw) - 1], "W %uW ", cgpu->monitor.gpu_power / 1000);
-				}
-
-				if(opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60)
-				{
-					gpulog(LOG_INFO, thr_id, "%u MHz %s%uC FAN %u%%",
-						   cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/,
-						   khw, cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan
-					);
-					cgpu->monitor.tm_displayed = (uint32_t)time(NULL);
 				}
+				clock += tmp_clock;
+				mem_clock += tmp_memclock;
+				tempC += gpu_temp(cgpu);
+				fanpercent += gpu_fanpercent(cgpu);
+				power += gpu_power(cgpu);
+				counter++;
+
+				usleep(50000);
+				if (abort_flag) goto abort;
+
+			} while (cgpu->monitor.sampling_flag && (--max_loops));
+
+			cgpu->monitor.gpu_temp = (uint32_t) (tempC/counter);
+			cgpu->monitor.gpu_fan = fanpercent/counter;
+			cgpu->monitor.gpu_power = power/counter;
+			cgpu->monitor.gpu_clock = (uint32_t) (clock/counter);
+			cgpu->monitor.gpu_memclock = (uint32_t) (mem_clock/counter);
+
+			if (power) {
+				khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]);
+				khs_per_watt = khs_per_watt / ((double)power / counter);
+				format_hashrate(khs_per_watt * 1000, khw);
+				if (strlen(khw))
+					sprintf(&khw[strlen(khw)-1], "W %uW ", cgpu->monitor.gpu_power / 1000);
+			}
 
-				pthread_mutex_unlock(&cgpu->monitor.lock);
+			if (opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60) {
+				gpulog(LOG_INFO, thr_id, "%u MHz %s%uC FAN %u%%",
+					cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/,
+					khw, cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan
+				);
+				cgpu->monitor.tm_displayed = (uint32_t)time(NULL);
 			}
-			usleep(500); // safety
+
+			pthread_mutex_unlock(&cgpu->monitor.lock);
 		}
-abort:
-		if(opt_debug_threads)
-			applog(LOG_DEBUG, "%s() died", __func__);
-		return NULL;
+		usleep(500); // safety
 	}
-#endif
\ No newline at end of file
+abort:
+	if (opt_debug_threads)
+		applog(LOG_DEBUG, "%s() died", __func__);
+	return NULL;
+}
+#endif