diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp index d44b8c0536fe4a..8a3c4409246ad1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp @@ -56,6 +56,7 @@ struct device_info { uint64_t max_local_mem_size; ///< Maximum size of local memory arena in bytes. uint64_t max_global_mem_size; ///< Maximum size of global device memory in bytes. uint64_t max_alloc_mem_size; ///< Maximum size of memory object allocation in bytes. + uint64_t max_global_cache_size; ///< Maximum size of cache memory bytes. uint64_t max_image2d_width; ///< Maximum image 2d width supported by the device. uint64_t max_image2d_height; ///< Maximum image 2d height supported by the device. diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index f2fa9bcdeeab1b..702e9249b36244 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -786,12 +786,24 @@ uint32_t Plugin::get_optimal_batch_size(const ov::AnyMap& options) const { auto device_id = get_property(ov::device::id.name(), options).as(); auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); - auto next_pow_of_2 = [] (float x) { - return pow(2, ceil(std::log(x)/std::log(2))); - }; + auto closest_pow_of_2 = [] (float x) { - return pow(2, floor(std::log(x)/std::log(2))); + int lower_power = static_cast(floor(std::log(x) / std::log(2))); + double lower_value = pow(2, lower_power); // Current power of 2 + double upper_value = pow(2, lower_power + 1); // Next power of 2 + + // Determine the threshold (70% of the range between lower and upper values) + // If x is within the upper 30% of the range, return the upper power of 2. + double threshold = 0.7 * (upper_value - lower_value); + + // Compare x with the threshold and return the appropriate power of 2 + if (x - lower_value > threshold) { + return upper_value; // Return the next power of 2 + } else { + return lower_value; // Return the current power of 2 + } }; + auto model_param = options.find(ov::hint::model.name()); if (model_param == options.end()) { GPU_DEBUG_INFO << "[OPTIMAL_BATCH_SIZE] ov::hint::model is not set: return 1" << std::endl; @@ -805,31 +817,10 @@ uint32_t Plugin::get_optimal_batch_size(const ov::AnyMap& options) const { } GPU_DEBUG_INFO << "DEVICE_INFO:" << "gfx_version.major, " << device_info.gfx_ver.major - << "gfx_version.minor " << std::to_string(device_info.gfx_ver.minor) << std::endl; - static std::map gen_kbytes_per_bank = { - {{12, 0, 0}, 480}, // TGL - {{12, 1, 0}, 2048}, // DG1 - {{12, 5, 0}, 320}, - {{12, 7, 0}, 512}, - }; - size_t L3_cache_size = device_info.gfx_ver.major && (device_info.gfx_ver.major <= 9) - ? 768 * 1024 // Gen9 - : 2 * 768 * 1024; //reasonable default when no arch has been detected (e.g. due to old driver ver) - cldnn::gfx_version gen = {device_info.gfx_ver.major, device_info.gfx_ver.minor, 0 /*ignore the revision*/}; - auto val = gen_kbytes_per_bank.find(gen); - if (gen_kbytes_per_bank.end() != val) { - auto kbytes_per_bank = val->second; - auto num_banks_per_slice = device_info.num_sub_slices_per_slice > 4 - ? next_pow_of_2(device_info.num_sub_slices_per_slice) - : 2 * device_info.num_sub_slices_per_slice; - L3_cache_size = kbytes_per_bank * 1024 * num_banks_per_slice * device_info.num_slices; - GPU_DEBUG_INFO << "DEVICE_INFO:" - << "num_slices " << device_info.num_slices - << ", num_sub_slices_per_slice " << device_info.num_sub_slices_per_slice - << ", num_banks_per_slice " << num_banks_per_slice - << ", gen_kbytes_per_bank : " << kbytes_per_bank - << ", L3_cache_size is (MB): " << float(L3_cache_size) / 1024 / 1024 << std::endl; - } + << "gfx_version.minor " << std::to_string(device_info.gfx_ver.minor) + << "Cache size " << std::to_string(device_info.max_global_cache_size) << std::endl; + + size_t L3_cache_size = device_info.max_global_cache_size; auto config = m_configs_map.at(device_id); auto cloned_model = clone_and_transform_model(model, config, context); ov::MemBandwidthPressure memPressure = ov::mem_bandwidth_pressure_tolerance(cloned_model, L3_cache_size); diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp index 7ab48308cfeaf7..74dbc016c65d31 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp @@ -224,6 +224,7 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex info.max_local_mem_size = static_cast(device.getInfo()); info.max_global_mem_size = static_cast(device.getInfo()); info.max_alloc_mem_size = static_cast(device.getInfo()); + info.max_global_cache_size = static_cast(device.getInfo()); info.supports_image = static_cast(device.getInfo()); info.max_image2d_width = static_cast(device.getInfo());