summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c1061
1 files changed, 620 insertions, 441 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 19ce59028d6b..c6a76090a725 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -28,27 +28,32 @@
#include <linux/hash.h>
#include <linux/cpufreq.h>
#include <linux/log2.h>
+#include <linux/dmi.h>
+#include <linux/atomic.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
#include "kfd_topology.h"
+#include "kfd_device_queue_manager.h"
+/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
-static int topology_crat_parsed;
static struct kfd_system_properties sys_props;
static DECLARE_RWSEM(topology_lock);
+static atomic_t topology_crat_proximity_domain;
-struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+ uint32_t proximity_domain)
{
struct kfd_topology_device *top_dev;
- struct kfd_dev *device = NULL;
+ struct kfd_topology_device *device = NULL;
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu_id == gpu_id) {
- device = top_dev->gpu;
+ if (top_dev->proximity_domain == proximity_domain) {
+ device = top_dev;
break;
}
@@ -57,7 +62,7 @@ struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
return device;
}
-struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
+struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
{
struct kfd_topology_device *top_dev;
struct kfd_dev *device = NULL;
@@ -65,7 +70,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu->pdev == pdev) {
+ if (top_dev->gpu_id == gpu_id) {
device = top_dev->gpu;
break;
}
@@ -75,282 +80,31 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
return device;
}
-static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size)
-{
- struct acpi_table_header *crat_table;
- acpi_status status;
-
- if (!size)
- return -EINVAL;
-
- /*
- * Fetch the CRAT table from ACPI
- */
- status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
- if (status == AE_NOT_FOUND) {
- pr_warn("CRAT table not found\n");
- return -ENODATA;
- } else if (ACPI_FAILURE(status)) {
- const char *err = acpi_format_exception(status);
-
- pr_err("CRAT table error: %s\n", err);
- return -EINVAL;
- }
-
- if (*size >= crat_table->length && crat_image != NULL)
- memcpy(crat_image, crat_table, crat_table->length);
-
- *size = crat_table->length;
-
- return 0;
-}
-
-static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
- struct crat_subtype_computeunit *cu)
-{
- dev->node_props.cpu_cores_count = cu->num_cpu_cores;
- dev->node_props.cpu_core_id_base = cu->processor_id_low;
- if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
- dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
-
- pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
- cu->processor_id_low);
-}
-
-static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
- struct crat_subtype_computeunit *cu)
-{
- dev->node_props.simd_id_base = cu->processor_id_low;
- dev->node_props.simd_count = cu->num_simd_cores;
- dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
- dev->node_props.max_waves_per_simd = cu->max_waves_simd;
- dev->node_props.wave_front_size = cu->wave_front_size;
- dev->node_props.mem_banks_count = cu->num_banks;
- dev->node_props.array_count = cu->num_arrays;
- dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
- dev->node_props.simd_per_cu = cu->num_simd_per_cu;
- dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
- if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
- dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
- pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores,
- cu->processor_id_low);
-}
-
-/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */
-static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu)
-{
- struct kfd_topology_device *dev;
- int i = 0;
-
- pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
- cu->proximity_domain, cu->hsa_capability);
- list_for_each_entry(dev, &topology_device_list, list) {
- if (cu->proximity_domain == i) {
- if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
- kfd_populated_cu_info_cpu(dev, cu);
-
- if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
- kfd_populated_cu_info_gpu(dev, cu);
- break;
- }
- i++;
- }
-
- return 0;
-}
-
-/*
- * kfd_parse_subtype_mem is called when the topology mutex is
- * already acquired
- */
-static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem)
-{
- struct kfd_mem_properties *props;
- struct kfd_topology_device *dev;
- int i = 0;
-
- pr_info("Found memory entry in CRAT table with proximity_domain=%d\n",
- mem->promixity_domain);
- list_for_each_entry(dev, &topology_device_list, list) {
- if (mem->promixity_domain == i) {
- props = kfd_alloc_struct(props);
- if (props == NULL)
- return -ENOMEM;
-
- if (dev->node_props.cpu_cores_count == 0)
- props->heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE;
- else
- props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
-
- if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
- props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
- if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
- props->flags |= HSA_MEM_FLAGS_NON_VOLATILE;
-
- props->size_in_bytes =
- ((uint64_t)mem->length_high << 32) +
- mem->length_low;
- props->width = mem->width;
-
- dev->mem_bank_count++;
- list_add_tail(&props->list, &dev->mem_props);
-
- break;
- }
- i++;
- }
-
- return 0;
-}
-
-/*
- * kfd_parse_subtype_cache is called when the topology mutex
- * is already acquired
- */
-static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache)
-{
- struct kfd_cache_properties *props;
- struct kfd_topology_device *dev;
- uint32_t id;
-
- id = cache->processor_id_low;
-
- pr_info("Found cache entry in CRAT table with processor_id=%d\n", id);
- list_for_each_entry(dev, &topology_device_list, list)
- if (id == dev->node_props.cpu_core_id_base ||
- id == dev->node_props.simd_id_base) {
- props = kfd_alloc_struct(props);
- if (props == NULL)
- return -ENOMEM;
-
- props->processor_id_low = id;
- props->cache_level = cache->cache_level;
- props->cache_size = cache->cache_size;
- props->cacheline_size = cache->cache_line_size;
- props->cachelines_per_tag = cache->lines_per_tag;
- props->cache_assoc = cache->associativity;
- props->cache_latency = cache->cache_latency;
-
- if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
- props->cache_type |= HSA_CACHE_TYPE_DATA;
- if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
- props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
- if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
- props->cache_type |= HSA_CACHE_TYPE_CPU;
- if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
- props->cache_type |= HSA_CACHE_TYPE_HSACU;
-
- dev->cache_count++;
- dev->node_props.caches_count++;
- list_add_tail(&props->list, &dev->cache_props);
-
- break;
- }
-
- return 0;
-}
-
-/*
- * kfd_parse_subtype_iolink is called when the topology mutex
- * is already acquired
- */
-static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink)
+struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
{
- struct kfd_iolink_properties *props;
- struct kfd_topology_device *dev;
- uint32_t i = 0;
- uint32_t id_from;
- uint32_t id_to;
-
- id_from = iolink->proximity_domain_from;
- id_to = iolink->proximity_domain_to;
+ struct kfd_topology_device *top_dev;
+ struct kfd_dev *device = NULL;
- pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from);
- list_for_each_entry(dev, &topology_device_list, list) {
- if (id_from == i) {
- props = kfd_alloc_struct(props);
- if (props == NULL)
- return -ENOMEM;
-
- props->node_from = id_from;
- props->node_to = id_to;
- props->ver_maj = iolink->version_major;
- props->ver_min = iolink->version_minor;
-
- /*
- * weight factor (derived from CDIR), currently always 1
- */
- props->weight = 1;
-
- props->min_latency = iolink->minimum_latency;
- props->max_latency = iolink->maximum_latency;
- props->min_bandwidth = iolink->minimum_bandwidth_mbs;
- props->max_bandwidth = iolink->maximum_bandwidth_mbs;
- props->rec_transfer_size =
- iolink->recommended_transfer_size;
-
- dev->io_link_count++;
- dev->node_props.io_links_count++;
- list_add_tail(&props->list, &dev->io_link_props);
+ down_read(&topology_lock);
+ list_for_each_entry(top_dev, &topology_device_list, list)
+ if (top_dev->gpu->pdev == pdev) {
+ device = top_dev->gpu;
break;
}
- i++;
- }
- return 0;
-}
-
-static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr)
-{
- struct crat_subtype_computeunit *cu;
- struct crat_subtype_memory *mem;
- struct crat_subtype_cache *cache;
- struct crat_subtype_iolink *iolink;
- int ret = 0;
-
- switch (sub_type_hdr->type) {
- case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
- cu = (struct crat_subtype_computeunit *)sub_type_hdr;
- ret = kfd_parse_subtype_cu(cu);
- break;
- case CRAT_SUBTYPE_MEMORY_AFFINITY:
- mem = (struct crat_subtype_memory *)sub_type_hdr;
- ret = kfd_parse_subtype_mem(mem);
- break;
- case CRAT_SUBTYPE_CACHE_AFFINITY:
- cache = (struct crat_subtype_cache *)sub_type_hdr;
- ret = kfd_parse_subtype_cache(cache);
- break;
- case CRAT_SUBTYPE_TLB_AFFINITY:
- /*
- * For now, nothing to do here
- */
- pr_info("Found TLB entry in CRAT table (not processing)\n");
- break;
- case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
- /*
- * For now, nothing to do here
- */
- pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n");
- break;
- case CRAT_SUBTYPE_IOLINK_AFFINITY:
- iolink = (struct crat_subtype_iolink *)sub_type_hdr;
- ret = kfd_parse_subtype_iolink(iolink);
- break;
- default:
- pr_warn("Unknown subtype (%d) in CRAT\n",
- sub_type_hdr->type);
- }
+ up_read(&topology_lock);
- return ret;
+ return device;
}
+/* Called with write topology_lock acquired */
static void kfd_release_topology_device(struct kfd_topology_device *dev)
{
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
+ struct kfd_perf_properties *perf;
list_del(&dev->list);
@@ -375,25 +129,35 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
kfree(iolink);
}
- kfree(dev);
+ while (dev->perf_props.next != &dev->perf_props) {
+ perf = container_of(dev->perf_props.next,
+ struct kfd_perf_properties, list);
+ list_del(&perf->list);
+ kfree(perf);
+ }
- sys_props.num_devices--;
+ kfree(dev);
}
-static void kfd_release_live_view(void)
+void kfd_release_topology_device_list(struct list_head *device_list)
{
struct kfd_topology_device *dev;
- while (topology_device_list.next != &topology_device_list) {
- dev = container_of(topology_device_list.next,
- struct kfd_topology_device, list);
+ while (!list_empty(device_list)) {
+ dev = list_first_entry(device_list,
+ struct kfd_topology_device, list);
kfd_release_topology_device(dev);
+ }
}
+static void kfd_release_live_view(void)
+{
+ kfd_release_topology_device_list(&topology_device_list);
memset(&sys_props, 0, sizeof(sys_props));
}
-static struct kfd_topology_device *kfd_create_topology_device(void)
+struct kfd_topology_device *kfd_create_topology_device(
+ struct list_head *device_list)
{
struct kfd_topology_device *dev;
@@ -406,65 +170,13 @@ static struct kfd_topology_device *kfd_create_topology_device(void)
INIT_LIST_HEAD(&dev->mem_props);
INIT_LIST_HEAD(&dev->cache_props);
INIT_LIST_HEAD(&dev->io_link_props);
+ INIT_LIST_HEAD(&dev->perf_props);
- list_add_tail(&dev->list, &topology_device_list);
- sys_props.num_devices++;
+ list_add_tail(&dev->list, device_list);
return dev;
}
-static int kfd_parse_crat_table(void *crat_image)
-{
- struct kfd_topology_device *top_dev;
- struct crat_subtype_generic *sub_type_hdr;
- uint16_t node_id;
- int ret;
- struct crat_header *crat_table = (struct crat_header *)crat_image;
- uint16_t num_nodes;
- uint32_t image_len;
-
- if (!crat_image)
- return -EINVAL;
-
- num_nodes = crat_table->num_domains;
- image_len = crat_table->length;
-
- pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
-
- for (node_id = 0; node_id < num_nodes; node_id++) {
- top_dev = kfd_create_topology_device();
- if (!top_dev) {
- kfd_release_live_view();
- return -ENOMEM;
- }
- }
-
- sys_props.platform_id =
- (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK;
- sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id);
- sys_props.platform_rev = crat_table->revision;
-
- sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
- while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
- ((char *)crat_image) + image_len) {
- if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
- ret = kfd_parse_subtype(sub_type_hdr);
- if (ret != 0) {
- kfd_release_live_view();
- return ret;
- }
- }
-
- sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
- sub_type_hdr->length);
- }
-
- sys_props.generation_count++;
- topology_crat_parsed = 1;
-
- return 0;
-}
-
#define sysfs_show_gen_prop(buffer, fmt, ...) \
snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
@@ -501,11 +213,17 @@ static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
return ret;
}
+static void kfd_topology_kobj_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
static const struct sysfs_ops sysprops_ops = {
.show = sysprops_show,
};
static struct kobj_type sysprops_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &sysprops_ops,
};
@@ -541,6 +259,7 @@ static const struct sysfs_ops iolink_ops = {
};
static struct kobj_type iolink_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &iolink_ops,
};
@@ -568,6 +287,7 @@ static const struct sysfs_ops mem_ops = {
};
static struct kobj_type mem_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &mem_ops,
};
@@ -575,7 +295,7 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
ssize_t ret;
- uint32_t i;
+ uint32_t i, j;
struct kfd_cache_properties *cache;
/* Making sure that the buffer is an empty string */
@@ -593,12 +313,18 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
- for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++)
- ret = snprintf(buffer, PAGE_SIZE, "%s%d%s",
- buffer, cache->sibling_map[i],
- (i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ?
- "\n" : ",");
-
+ for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
+ for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
+ /* Check each bit */
+ if (cache->sibling_map[i] & (1 << j))
+ ret = snprintf(buffer, PAGE_SIZE,
+ "%s%d%s", buffer, 1, ",");
+ else
+ ret = snprintf(buffer, PAGE_SIZE,
+ "%s%d%s", buffer, 0, ",");
+ }
+ /* Replace the last "," with end of line */
+ *(buffer + strlen(buffer) - 1) = 0xA;
return ret;
}
@@ -607,9 +333,43 @@ static const struct sysfs_ops cache_ops = {
};
static struct kobj_type cache_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &cache_ops,
};
+/****** Sysfs of Performance Counters ******/
+
+struct kfd_perf_attr {
+ struct kobj_attribute attr;
+ uint32_t data;
+};
+
+static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
+ char *buf)
+{
+ struct kfd_perf_attr *attr;
+
+ buf[0] = 0;
+ attr = container_of(attrs, struct kfd_perf_attr, attr);
+ if (!attr->data) /* invalid data for PMC */
+ return 0;
+ else
+ return sysfs_show_32bit_val(buf, attr->data);
+}
+
+#define KFD_PERF_DESC(_name, _data) \
+{ \
+ .attr = __ATTR(_name, 0444, perf_show, NULL), \
+ .data = _data, \
+}
+
+static struct kfd_perf_attr perf_attr_iommu[] = {
+ KFD_PERF_DESC(max_concurrent, 0),
+ KFD_PERF_DESC(num_counters, 0),
+ KFD_PERF_DESC(counter_ids, 0),
+};
+/****************************************/
+
static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
@@ -646,18 +406,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, "simd_count",
dev->node_props.simd_count);
-
- if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
- pr_info_once("mem_banks_count truncated from %d to %d\n",
- dev->node_props.mem_banks_count,
- dev->mem_bank_count);
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
- dev->mem_bank_count);
- } else {
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
- dev->node_props.mem_banks_count);
- }
-
+ sysfs_show_32bit_prop(buffer, "mem_banks_count",
+ dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, "caches_count",
dev->node_props.caches_count);
sysfs_show_32bit_prop(buffer, "io_links_count",
@@ -705,9 +455,12 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
}
+ if (dev->gpu->device_info->asic_family == CHIP_TONGA)
+ dev->node_props.capability |=
+ HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+
sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
- dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(
- dev->gpu->kgd));
+ dev->node_props.max_engine_clk_fcompute);
sysfs_show_64bit_prop(buffer, "local_mem_size",
(unsigned long long int) 0);
@@ -729,6 +482,7 @@ static const struct sysfs_ops node_ops = {
};
static struct kobj_type node_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &node_ops,
};
@@ -744,6 +498,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
+ struct kfd_perf_properties *perf;
if (dev->kobj_iolink) {
list_for_each_entry(iolink, &dev->io_link_props, list)
@@ -780,6 +535,16 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
dev->kobj_mem = NULL;
}
+ if (dev->kobj_perf) {
+ list_for_each_entry(perf, &dev->perf_props, list) {
+ kfree(perf->attr_group);
+ perf->attr_group = NULL;
+ }
+ kobject_del(dev->kobj_perf);
+ kobject_put(dev->kobj_perf);
+ dev->kobj_perf = NULL;
+ }
+
if (dev->kobj_node) {
sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
sysfs_remove_file(dev->kobj_node, &dev->attr_name);
@@ -796,8 +561,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
+ struct kfd_perf_properties *perf;
int ret;
- uint32_t i;
+ uint32_t i, num_attrs;
+ struct attribute **attrs;
if (WARN_ON(dev->kobj_node))
return -EEXIST;
@@ -826,6 +593,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
if (!dev->kobj_iolink)
return -ENOMEM;
+ dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
+ if (!dev->kobj_perf)
+ return -ENOMEM;
+
/*
* Creating sysfs files for node properties
*/
@@ -903,11 +674,38 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
if (ret < 0)
return ret;
i++;
-}
+ }
+
+ /* All hardware blocks have the same number of attributes. */
+ num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
+ list_for_each_entry(perf, &dev->perf_props, list) {
+ perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
+ * num_attrs + sizeof(struct attribute_group),
+ GFP_KERNEL);
+ if (!perf->attr_group)
+ return -ENOMEM;
+
+ attrs = (struct attribute **)(perf->attr_group + 1);
+ if (!strcmp(perf->block_name, "iommu")) {
+ /* Information of IOMMU's num_counters and counter_ids is shown
+ * under /sys/bus/event_source/devices/amd_iommu. We don't
+ * duplicate here.
+ */
+ perf_attr_iommu[0].data = perf->max_concurrent;
+ for (i = 0; i < num_attrs; i++)
+ attrs[i] = &perf_attr_iommu[i].attr.attr;
+ }
+ perf->attr_group->name = perf->block_name;
+ perf->attr_group->attrs = attrs;
+ ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
+/* Called with write topology lock acquired */
static int kfd_build_sysfs_node_tree(void)
{
struct kfd_topology_device *dev;
@@ -924,6 +722,7 @@ static int kfd_build_sysfs_node_tree(void)
return 0;
}
+/* Called with write topology lock acquired */
static void kfd_remove_sysfs_node_tree(void)
{
struct kfd_topology_device *dev;
@@ -995,75 +794,246 @@ static void kfd_topology_release_sysfs(void)
}
}
+/* Called with write topology_lock acquired */
+static void kfd_topology_update_device_list(struct list_head *temp_list,
+ struct list_head *master_list)
+{
+ while (!list_empty(temp_list)) {
+ list_move_tail(temp_list->next, master_list);
+ sys_props.num_devices++;
+ }
+}
+
+static void kfd_debug_print_topology(void)
+{
+ struct kfd_topology_device *dev;
+
+ down_read(&topology_lock);
+
+ dev = list_last_entry(&topology_device_list,
+ struct kfd_topology_device, list);
+ if (dev) {
+ if (dev->node_props.cpu_cores_count &&
+ dev->node_props.simd_count) {
+ pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
+ dev->node_props.device_id,
+ dev->node_props.vendor_id);
+ } else if (dev->node_props.cpu_cores_count)
+ pr_info("Topology: Add CPU node\n");
+ else if (dev->node_props.simd_count)
+ pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
+ dev->node_props.device_id,
+ dev->node_props.vendor_id);
+ }
+ up_read(&topology_lock);
+}
+
+/* Helper function for intializing platform_xx members of
+ * kfd_system_properties. Uses OEM info from the last CPU/APU node.
+ */
+static void kfd_update_system_properties(void)
+{
+ struct kfd_topology_device *dev;
+
+ down_read(&topology_lock);
+ dev = list_last_entry(&topology_device_list,
+ struct kfd_topology_device, list);
+ if (dev) {
+ sys_props.platform_id =
+ (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+ sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
+ sys_props.platform_rev = dev->oem_revision;
+ }
+ up_read(&topology_lock);
+}
+
+static void find_system_memory(const struct dmi_header *dm,
+ void *private)
+{
+ struct kfd_mem_properties *mem;
+ u16 mem_width, mem_clock;
+ struct kfd_topology_device *kdev =
+ (struct kfd_topology_device *)private;
+ const u8 *dmi_data = (const u8 *)(dm + 1);
+
+ if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
+ mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
+ mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
+ list_for_each_entry(mem, &kdev->mem_props, list) {
+ if (mem_width != 0xFFFF && mem_width != 0)
+ mem->width = mem_width;
+ if (mem_clock != 0)
+ mem->mem_clk_max = mem_clock;
+ }
+ }
+}
+
+/*
+ * Performance counters information is not part of CRAT but we would like to
+ * put them in the sysfs under topology directory for Thunk to get the data.
+ * This function is called before updating the sysfs.
+ */
+static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
+{
+ struct kfd_perf_properties *props;
+
+ if (amd_iommu_pc_supported()) {
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+ strcpy(props->block_name, "iommu");
+ props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
+ amd_iommu_pc_get_max_counters(0); /* assume one iommu */
+ list_add_tail(&props->list, &kdev->perf_props);
+ }
+
+ return 0;
+}
+
+/* kfd_add_non_crat_information - Add information that is not currently
+ * defined in CRAT but is necessary for KFD topology
+ * @dev - topology device to which addition info is added
+ */
+static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
+{
+ /* Check if CPU only node. */
+ if (!kdev->gpu) {
+ /* Add system memory information */
+ dmi_walk(find_system_memory, kdev);
+ }
+ /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
+}
+
+/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
+ * Ignore CRAT for all other devices. AMD APU is identified if both CPU
+ * and GPU cores are present.
+ * @device_list - topology device list created by parsing ACPI CRAT table.
+ * @return - TRUE if invalid, FALSE is valid.
+ */
+static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
+{
+ struct kfd_topology_device *dev;
+
+ list_for_each_entry(dev, device_list, list) {
+ if (dev->node_props.cpu_cores_count &&
+ dev->node_props.simd_count)
+ return false;
+ }
+ pr_info("Ignoring ACPI CRAT on non-APU system\n");
+ return true;
+}
+
int kfd_topology_init(void)
{
void *crat_image = NULL;
size_t image_size = 0;
int ret;
-
- /*
- * Initialize the head for the topology device list
+ struct list_head temp_topology_device_list;
+ int cpu_only_node = 0;
+ struct kfd_topology_device *kdev;
+ int proximity_domain;
+
+ /* topology_device_list - Master list of all topology devices
+ * temp_topology_device_list - temporary list created while parsing CRAT
+ * or VCRAT. Once parsing is complete the contents of list is moved to
+ * topology_device_list
*/
+
+ /* Initialize the head for the both the lists */
INIT_LIST_HEAD(&topology_device_list);
+ INIT_LIST_HEAD(&temp_topology_device_list);
init_rwsem(&topology_lock);
- topology_crat_parsed = 0;
memset(&sys_props, 0, sizeof(sys_props));
+ /* Proximity domains in ACPI CRAT tables start counting at
+ * 0. The same should be true for virtual CRAT tables created
+ * at this stage. GPUs added later in kfd_topology_add_device
+ * use a counter.
+ */
+ proximity_domain = 0;
+
/*
- * Get the CRAT image from the ACPI
+ * Get the CRAT image from the ACPI. If ACPI doesn't have one
+ * or if ACPI CRAT is invalid create a virtual CRAT.
+ * NOTE: The current implementation expects all AMD APUs to have
+ * CRAT. If no CRAT is available, it is assumed to be a CPU
*/
- ret = kfd_topology_get_crat_acpi(crat_image, &image_size);
- if (ret == 0 && image_size > 0) {
- pr_info("Found CRAT image with size=%zd\n", image_size);
- crat_image = kmalloc(image_size, GFP_KERNEL);
- if (!crat_image) {
- ret = -ENOMEM;
- pr_err("No memory for allocating CRAT image\n");
- goto err;
+ ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
+ if (!ret) {
+ ret = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (ret ||
+ kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
+ kfd_release_topology_device_list(
+ &temp_topology_device_list);
+ kfd_destroy_crat_image(crat_image);
+ crat_image = NULL;
}
- ret = kfd_topology_get_crat_acpi(crat_image, &image_size);
-
- if (ret == 0) {
- down_write(&topology_lock);
- ret = kfd_parse_crat_table(crat_image);
- if (ret == 0)
- ret = kfd_topology_update_sysfs();
- up_write(&topology_lock);
- } else {
- pr_err("Couldn't get CRAT table size from ACPI\n");
+ }
+
+ if (!crat_image) {
+ ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
+ COMPUTE_UNIT_CPU, NULL,
+ proximity_domain);
+ cpu_only_node = 1;
+ if (ret) {
+ pr_err("Error creating VCRAT table for CPU\n");
+ return ret;
}
- kfree(crat_image);
- } else if (ret == -ENODATA) {
- ret = 0;
- } else {
- pr_err("Couldn't get CRAT table size from ACPI\n");
+
+ ret = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (ret) {
+ pr_err("Error parsing VCRAT table for CPU\n");
+ goto err;
+ }
+ }
+
+ kdev = list_first_entry(&temp_topology_device_list,
+ struct kfd_topology_device, list);
+ kfd_add_perf_to_topology(kdev);
+
+ down_write(&topology_lock);
+ kfd_topology_update_device_list(&temp_topology_device_list,
+ &topology_device_list);
+ atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
+ ret = kfd_topology_update_sysfs();
+ up_write(&topology_lock);
+
+ if (!ret) {
+ sys_props.generation_count++;
+ kfd_update_system_properties();
+ kfd_debug_print_topology();
+ pr_info("Finished initializing topology\n");
+ } else
+ pr_err("Failed to update topology in sysfs ret=%d\n", ret);
+
+ /* For nodes with GPU, this information gets added
+ * when GPU is detected (kfd_topology_add_device).
+ */
+ if (cpu_only_node) {
+ /* Add additional information to CPU only node created above */
+ down_write(&topology_lock);
+ kdev = list_first_entry(&topology_device_list,
+ struct kfd_topology_device, list);
+ up_write(&topology_lock);
+ kfd_add_non_crat_information(kdev);
}
err:
- pr_info("Finished initializing topology ret=%d\n", ret);
+ kfd_destroy_crat_image(crat_image);
return ret;
}
void kfd_topology_shutdown(void)
{
+ down_write(&topology_lock);
kfd_topology_release_sysfs();
kfd_release_live_view();
-}
-
-static void kfd_debug_print_topology(void)
-{
- struct kfd_topology_device *dev;
- uint32_t i = 0;
-
- pr_info("DEBUG PRINT OF TOPOLOGY:");
- list_for_each_entry(dev, &topology_device_list, list) {
- pr_info("Node: %d\n", i);
- pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no"));
- pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count);
- pr_info("\tSIMD count: %d", dev->node_props.simd_count);
- i++;
- }
+ up_write(&topology_lock);
}
static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
@@ -1072,11 +1042,15 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
uint32_t buf[7];
uint64_t local_mem_size;
int i;
+ struct kfd_local_mem_info local_mem_info;
if (!gpu)
return 0;
- local_mem_size = gpu->kfd2kgd->get_vmem_size(gpu->kgd);
+ gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info);
+
+ local_mem_size = local_mem_info.local_mem_size_private +
+ local_mem_info.local_mem_size_public;
buf[0] = gpu->pdev->devfn;
buf[1] = gpu->pdev->subsystem_vendor;
@@ -1091,19 +1065,26 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
return hashout;
}
-
+/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
+ * the GPU device is not already present in the topology device
+ * list then return NULL. This means a new topology device has to
+ * be created for this GPU.
+ * TODO: Rather than assiging @gpu to first topology device withtout
+ * gpu attached, it will better to have more stringent check.
+ */
static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
{
struct kfd_topology_device *dev;
struct kfd_topology_device *out_dev = NULL;
+ down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list)
if (!dev->gpu && (dev->node_props.simd_count > 0)) {
dev->gpu = gpu;
out_dev = dev;
break;
}
-
+ up_write(&topology_lock);
return out_dev;
}
@@ -1115,84 +1096,196 @@ static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
*/
}
+/* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
+ * patch this after CRAT parsing.
+ */
+static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
+{
+ struct kfd_mem_properties *mem;
+ struct kfd_local_mem_info local_mem_info;
+
+ if (!dev)
+ return;
+
+ /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
+ * single bank of VRAM local memory.
+ * for dGPUs - VCRAT reports only one bank of Local Memory
+ * for APUs - If CRAT from ACPI reports more than one bank, then
+ * all the banks will report the same mem_clk_max information
+ */
+ dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd,
+ &local_mem_info);
+
+ list_for_each_entry(mem, &dev->mem_props, list)
+ mem->mem_clk_max = local_mem_info.mem_clk_max;
+}
+
+static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
+{
+ struct kfd_iolink_properties *link;
+
+ if (!dev || !dev->gpu)
+ return;
+
+ /* GPU only creates direck links so apply flags setting to all */
+ if (dev->gpu->device_info->asic_family == CHIP_HAWAII)
+ list_for_each_entry(link, &dev->io_link_props, list)
+ link->flags = CRAT_IOLINK_FLAGS_ENABLED |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+}
+
int kfd_topology_add_device(struct kfd_dev *gpu)
{
uint32_t gpu_id;
struct kfd_topology_device *dev;
- int res;
+ struct kfd_cu_info cu_info;
+ int res = 0;
+ struct list_head temp_topology_device_list;
+ void *crat_image = NULL;
+ size_t image_size = 0;
+ int proximity_domain;
+
+ INIT_LIST_HEAD(&temp_topology_device_list);
gpu_id = kfd_generate_gpu_id(gpu);
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
- down_write(&topology_lock);
- /*
- * Try to assign the GPU to existing topology device (generated from
- * CRAT table
+ proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
+
+ /* Check to see if this gpu device exists in the topology_device_list.
+ * If so, assign the gpu to that device,
+ * else create a Virtual CRAT for this gpu device and then parse that
+ * CRAT to create a new topology device. Once created assign the gpu to
+ * that topology device
*/
dev = kfd_assign_gpu(gpu);
if (!dev) {
- pr_info("GPU was not found in the current topology. Extending.\n");
- kfd_debug_print_topology();
- dev = kfd_create_topology_device();
- if (!dev) {
- res = -ENOMEM;
+ res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+ COMPUTE_UNIT_GPU, gpu,
+ proximity_domain);
+ if (res) {
+ pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
+ gpu_id);
+ return res;
+ }
+ res = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (res) {
+ pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
+ gpu_id);
goto err;
}
- dev->gpu = gpu;
- /*
- * TODO: Make a call to retrieve topology information from the
- * GPU vBIOS
- */
+ down_write(&topology_lock);
+ kfd_topology_update_device_list(&temp_topology_device_list,
+ &topology_device_list);
/* Update the SYSFS tree, since we added another topology
* device
*/
- if (kfd_topology_update_sysfs() < 0)
- kfd_topology_release_sysfs();
-
+ res = kfd_topology_update_sysfs();
+ up_write(&topology_lock);
+
+ if (!res)
+ sys_props.generation_count++;
+ else
+ pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+ gpu_id, res);
+ dev = kfd_assign_gpu(gpu);
+ if (WARN_ON(!dev)) {
+ res = -ENODEV;
+ goto err;
+ }
}
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
+
+ /* TODO: Move the following lines to function
+ * kfd_add_non_crat_information
+ */
+
+ /* Fill-in additional information that is not available in CRAT but
+ * needed for the topology
+ */
+
+ dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
+ dev->node_props.simd_arrays_per_engine =
+ cu_info.num_shader_arrays_per_engine;
+
dev->node_props.vendor_id = gpu->pdev->vendor;
dev->node_props.device_id = gpu->pdev->device;
- dev->node_props.location_id = (gpu->pdev->bus->number << 24) +
- (gpu->pdev->devfn & 0xffffff);
- /*
- * TODO: Retrieve max engine clock values from KGD
- */
+ dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
+ gpu->pdev->devfn);
+ dev->node_props.max_engine_clk_fcompute =
+ dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
+ dev->node_props.max_engine_clk_ccompute =
+ cpufreq_quick_get_max(0) / 1000;
+
+ kfd_fill_mem_clk_max_info(dev);
+ kfd_fill_iolink_non_crat_info(dev);
+
+ switch (dev->gpu->device_info->asic_family) {
+ case CHIP_KAVERI:
+ case CHIP_HAWAII:
+ case CHIP_TONGA:
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ break;
+ case CHIP_CARRIZO:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ pr_debug("Adding doorbell packet type capability\n");
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ break;
+ default:
+ WARN(1, "Unexpected ASIC family %u",
+ dev->gpu->device_info->asic_family);
+ }
+ /* Fix errors in CZ CRAT.
+ * simd_count: Carrizo CRAT reports wrong simd_count, probably
+ * because it doesn't consider masked out CUs
+ * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
+ * capability flag: Carrizo CRAT doesn't report IOMMU flags
+ */
if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
- dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
- pr_info("Adding doorbell packet type capability\n");
+ dev->node_props.simd_count =
+ cu_info.simd_per_cu * cu_info.cu_active_number;
+ dev->node_props.max_waves_per_simd = 10;
+ dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
}
- res = 0;
-
-err:
- up_write(&topology_lock);
+ kfd_debug_print_topology();
- if (res == 0)
+ if (!res)
kfd_notify_gpu_change(gpu_id, 1);
-
+err:
+ kfd_destroy_crat_image(crat_image);
return res;
}
int kfd_topology_remove_device(struct kfd_dev *gpu)
{
- struct kfd_topology_device *dev;
+ struct kfd_topology_device *dev, *tmp;
uint32_t gpu_id;
int res = -ENODEV;
down_write(&topology_lock);
- list_for_each_entry(dev, &topology_device_list, list)
+ list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
if (dev->gpu == gpu) {
gpu_id = dev->gpu_id;
kfd_remove_sysfs_node_entry(dev);
kfd_release_topology_device(dev);
+ sys_props.num_devices--;
res = 0;
if (kfd_topology_update_sysfs() < 0)
kfd_topology_release_sysfs();
@@ -1201,28 +1294,32 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
up_write(&topology_lock);
- if (res == 0)
+ if (!res)
kfd_notify_gpu_change(gpu_id, 0);
return res;
}
-/*
- * When idx is out of bounds, the function will return NULL
+/* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
+ * topology. If GPU device is found @idx, then valid kfd_dev pointer is
+ * returned through @kdev
+ * Return - 0: On success (@kdev will be NULL for non GPU nodes)
+ * -1: If end of list
*/
-struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
{
struct kfd_topology_device *top_dev;
- struct kfd_dev *device = NULL;
uint8_t device_idx = 0;
+ *kdev = NULL;
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list) {
if (device_idx == idx) {
- device = top_dev->gpu;
- break;
+ *kdev = top_dev->gpu;
+ up_read(&topology_lock);
+ return 0;
}
device_idx++;
@@ -1230,6 +1327,88 @@ struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
up_read(&topology_lock);
- return device;
+ return -1;
+
+}
+
+static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
+{
+ const struct cpuinfo_x86 *cpuinfo;
+ int first_cpu_of_numa_node;
+
+ if (!cpumask || cpumask == cpu_none_mask)
+ return -1;
+ first_cpu_of_numa_node = cpumask_first(cpumask);
+ if (first_cpu_of_numa_node >= nr_cpu_ids)
+ return -1;
+ cpuinfo = &cpu_data(first_cpu_of_numa_node);
+ return cpuinfo->apicid;
}
+
+/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
+ * of the given NUMA node (numa_node_id)
+ * Return -1 on failure
+ */
+int kfd_numa_node_to_apic_id(int numa_node_id)
+{
+ if (numa_node_id == -1) {
+ pr_warn("Invalid NUMA Node. Use online CPU mask\n");
+ return kfd_cpumask_to_apic_id(cpu_online_mask);
+ }
+ return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
+{
+ struct kfd_topology_device *dev;
+ unsigned int i = 0;
+ int r = 0;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (!dev->gpu) {
+ i++;
+ continue;
+ }
+
+ seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
+ r = dqm_debugfs_hqds(m, dev->gpu->dqm);
+ if (r)
+ break;
+ }
+
+ up_read(&topology_lock);
+
+ return r;
+}
+
+int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
+{
+ struct kfd_topology_device *dev;
+ unsigned int i = 0;
+ int r = 0;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (!dev->gpu) {
+ i++;
+ continue;
+ }
+
+ seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
+ r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
+ if (r)
+ break;
+ }
+
+ up_read(&topology_lock);
+
+ return r;
+}
+
+#endif