Merge branch 'nvapi' into cuda9

xiaolin1579 · Jan 14, 2018 · d988064 · d988064
2 parents d672784 + ef53486
commit d988064
Show file tree

Hide file tree

Showing 9 changed files with 1,754 additions and 425 deletions.
diff --git a/ccminer.cpp b/ccminer.cpp
@@ -71,6 +71,7 @@ void cuda_devicereset();
 int cuda_finddevice(char *name);
 void cuda_print_devices();
 void cuda_get_device_sm();
+void cuda_reset_device(int thr_id, bool *init);
 
 #include "nvml.h"
 #ifdef USE_WRAPNVML
@@ -90,6 +91,11 @@ struct workio_cmd {
 	} u;
 };
 
+bool opt_debug_diff = false;
+bool opt_debug_threads = false;
+bool opt_showdiff = true;
+bool opt_hwmonitor = true;
+
 static const char *algo_names[] = {
 	"bitcoin",
 	"blake",
@@ -149,21 +155,29 @@ static json_t *opt_config = nullptr;
 static const bool opt_time = true;
 enum sha_algos opt_algo;
 int opt_n_threads = 0;
+int gpu_threads = 1;
 int opt_affinity = -1;
 int opt_priority = 0;
 static double opt_difficulty = 1; // CH
 static bool opt_extranonce = true;
 bool opt_trust_pool = false;
 int num_cpus;
 int active_gpus;
+bool need_nvsettings = false;
+bool need_memclockrst = false;
 char * device_name[MAX_GPUS] = { nullptr };
 int device_map[MAX_GPUS] = { 0 };
 long  device_sm[MAX_GPUS] = { 0 };
 uint32_t gpus_intensity[MAX_GPUS] = {0};
+int32_t device_mem_offsets[MAX_GPUS] = {0};
 uint32_t device_gpu_clocks[MAX_GPUS] = {0};
 uint32_t device_mem_clocks[MAX_GPUS] = {0};
 uint32_t device_plimit[MAX_GPUS] = {0};
 int8_t device_pstate[MAX_GPUS];
+int32_t device_led[MAX_GPUS] = {-1, -1};
+int opt_led_mode = 0;
+int opt_cudaschedule = -1;
+uint8_t device_tlimit[MAX_GPUS] = {0};
 char *rpc_user = NULL;
 static char *rpc_url = nullptr;
 static char *rpc_userpass = nullptr;
@@ -179,16 +193,17 @@ int longpoll_thr_id = -1;
 int stratum_thr_id = -1;
 int api_thr_id = -1;
 bool stratum_need_reset = false;
+volatile bool abort_flag = false;
 struct work_restart *work_restart = NULL;
 struct stratum_ctx stratum = { 0 };
 bool stop_mining = false;
 volatile bool mining_has_stopped[MAX_GPUS];
 
 pthread_mutex_t applog_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
 uint32_t accepted_count = 0L;
 uint32_t rejected_count = 0L;
-static double thr_hashrates[MAX_GPUS];
+double thr_hashrates[MAX_GPUS];
 uint64_t global_hashrate = 0;
 double   global_diff = 0.0;
 uint64_t net_hashrate = 0;
@@ -291,8 +306,8 @@ Options:\n\
 "\
       --mem-clock=N     Set the gpu memory max clock (346.72+ driver)\n\
       --gpu-clock=N     Set the gpu engine max clock (346.72+ driver)\n\
-      --pstate=N        Set the gpu power state (352.21+ driver)\n\
-			--plimit=N        Set the gpu power limit(352.21 + driver)\n"
+      --pstate=N        (not for 10xx cards) Set the gpu power state (352.21+ driver)\n\
+      --plimit=N        Set the gpu power limit (352.21+ driver)\n"
 #endif
 "";
 
@@ -463,6 +478,26 @@ void proper_exit(int reason)
 #ifdef WIN32
 	timeEndPeriod(1);
 #endif
+#ifdef USE_WRAPNVML
+	if(hnvml)
+	{
+		for(int n = 0; n < opt_n_threads; n++)
+		{
+			nvml_reset_clocks(hnvml, device_map[n]);
+		}
+		nvml_destroy(hnvml);
+	}
+	if(need_memclockrst)
+	{
+#	ifdef WIN32
+		for(int n = 0; n < opt_n_threads; n++)
+		{
+			nvapi_toggle_clocks(n, false);
+		}
+#	endif
+	}
+#endif
+
 	sleep(1);
 	exit(reason);
 }
@@ -2538,7 +2573,11 @@ static void parse_arg(int key, char *arg)
 		while(pch != NULL && n < MAX_GPUS)
 		{
 			int dev_id = device_map[n++];
-			device_mem_clocks[dev_id] = atoi(pch);
+			if(*pch == '+' || *pch == '-')
+				device_mem_offsets[dev_id] = atoi(pch);
+			else
+				device_mem_clocks[dev_id] = atoi(pch);
+			need_nvsettings = true;
 			pch = strtok(NULL, ",");
 		}
 	}
@@ -2804,28 +2843,6 @@ int main(int argc, char *argv[])
 
 	cuda_devicenames(); 
 
-#ifdef USE_WRAPNVML
-#if defined(__linux__) || defined(_WIN64)
-	/* nvml is currently not the best choice on Windows (only in x64) */
-	hnvml = nvml_create();
-	if(hnvml)
-	{
-		bool gpu_reinit = false;// (opt_cudaschedule >= 0);
-		cuda_devicenames(); // refresh gpu vendor name
-		applog(LOG_INFO, "NVML GPU monitoring enabled.");
-	}
-#endif
-#ifdef WIN32
-	if(!hnvml && nvapi_init() == 0)
-	{
-		applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
-		cuda_devicenames(); // refresh gpu vendor name
-	}
-#endif
-	else if(!hnvml)
-		applog(LOG_INFO, "GPU monitoring is not available.");
-#endif
-
 	if(opt_protocol)
 	{
 		curl_version_info_data *info;
@@ -3025,26 +3042,63 @@ int main(int argc, char *argv[])
 			tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
 	}
 
+
+#ifdef __linux__
+	if(need_nvsettings)
+	{
+		if(nvs_init() < 0)
+			need_nvsettings = false;
+	}
+#endif
+
 #ifdef USE_WRAPNVML
 #if defined(__linux__) || defined(_WIN64)
 	/* nvml is currently not the best choice on Windows (only in x64) */
-	if (hnvml) {
-		bool gpu_reinit = false;// (opt_cudaschedule >= 0);
+	hnvml = nvml_create();
+	if(hnvml)
+	{
+		bool gpu_reinit = (opt_cudaschedule >= 0); //false
+		cuda_devicenames(); // refresh gpu vendor name
+		if(!opt_quiet)
+			applog(LOG_INFO, "NVML GPU monitoring enabled.");
 		for(int n = 0; n < active_gpus; n++)
 		{
 			if(nvml_set_pstate(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
 			if(nvml_set_plimit(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
-			if(nvml_set_clocks(hnvml, device_map[n]) == 1)
+			if(!is_windows() && nvml_set_clocks(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
 			if(gpu_reinit)
 			{
-//				cuda_reset_device(n, NULL);
+				cuda_reset_device(n, NULL);
 			}
 		}
 	}
 #endif
+#ifdef WIN32
+	if(nvapi_init() == 0)
+	{
+		if(!opt_quiet)
+			applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
+		if(!hnvml)
+		{
+			cuda_devicenames(); // refresh gpu vendor name
+		}
+		nvapi_init_settings();
+	}
+#endif
+	else if(!hnvml && !opt_quiet)
+		applog(LOG_INFO, "GPU monitoring is not available.");
+
+	// force reinit to set default device flags
+	if(opt_cudaschedule >= 0 && !hnvml)
+	{
+		for(int n = 0; n < active_gpus; n++)
+		{
+			cuda_reset_device(n, NULL);
+		}
+	}
 #endif
 
 	if(opt_api_listen)

diff --git a/ccminer.vcxproj b/ccminer.vcxproj
@@ -285,6 +285,7 @@
     </CudaCompile>
     <CudaCompile Include="Sia\sia.cu" />
     <ClCompile Include="nvapi.cpp" />
+    <ClCompile Include="nvsettings.cpp" />
     <ClCompile Include="sph\neoscrypt.cpp" />
     <ClCompile Include="sph\sha256_Y.c" />
     <ClCompile Include="sph\sph_sha2.c" />

diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
@@ -207,6 +207,9 @@
     <ClCompile Include="nvapi.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="nvsettings.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="compat.h">

diff --git a/cuda.cpp b/cuda.cpp
@@ -281,3 +281,11 @@ double throughput2intensity(uint32_t throughput)
 	}
 	return intensity;
 }
+
+void cuda_reset_device(int thr_id, bool *init)
+{
+	int dev_id = device_map[thr_id];
+	cudaSetDevice(dev_id);
+	cudaDeviceReset();
+	cudaDeviceSynchronize();
+}
diff --git a/miner.h b/miner.h
@@ -75,16 +75,19 @@ void *alloca (size_t);
 
 #ifdef HAVE_SYSLOG_H
 #include <syslog.h>
-#define LOG_BLUE 0x10 /* unique value */
+#define LOG_BLUE 0x10
+#define LOG_RAW  0x99
 #else
-enum {
+enum
+{
 	LOG_ERR,
 	LOG_WARNING,
 	LOG_NOTICE,
 	LOG_INFO,
 	LOG_DEBUG,
 	/* custom notices */
 	LOG_BLUE = 0x10,
+	LOG_RAW = 0x99
 };
 #endif
 
@@ -481,6 +484,7 @@ struct thr_info {
 extern int cuda_num_devices();
 extern int cuda_version();
 extern int cuda_gpu_clocks(struct cgpu_info *gpu);
+int cuda_gpu_info(struct cgpu_info *gpu);
 extern bool opt_verify;
 extern bool opt_benchmark;
 extern bool opt_debug;
@@ -507,16 +511,18 @@ extern int longpoll_thr_id;
 extern int stratum_thr_id;
 extern int api_thr_id;
 extern bool opt_trust_pool;
-
+extern volatile bool abort_flag;
 extern uint64_t global_hashrate;
 extern double   global_diff;
 
 #define MAX_GPUS 16
 extern char* device_name[MAX_GPUS];
 extern int device_map[MAX_GPUS];
 extern long  device_sm[MAX_GPUS];
+extern uint32_t device_plimit[MAX_GPUS];
 extern uint32_t gpus_intensity[MAX_GPUS];
 double throughput2intensity(uint32_t throughput);
+extern void gpulog(int prio, int thr_id, const char *fmt, ...);
 
 #define CL_N    "\x1B[0m"
 #define CL_RED  "\x1B[31m"