summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/xe/xe_gsc.c
blob: 8a137cb833186d049793a44dac670221a4e3a856 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2023 Intel Corporation
 */

#include "xe_gsc.h"

#include <linux/delay.h>

#include <drm/drm_managed.h>

#include <generated/xe_wa_oob.h>

#include "abi/gsc_mkhi_commands_abi.h"
#include "xe_bb.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
#include "xe_force_wake.h"
#include "xe_gsc_proxy.h"
#include "xe_gsc_submit.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_guc_pc.h"
#include "xe_huc.h"
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_sched_job.h"
#include "xe_uc_fw.h"
#include "xe_wa.h"
#include "instructions/xe_gsc_commands.h"
#include "regs/xe_gsc_regs.h"
#include "regs/xe_gt_regs.h"

static struct xe_gt *
gsc_to_gt(struct xe_gsc *gsc)
{
	return container_of(gsc, struct xe_gt, uc.gsc);
}

static int memcpy_fw(struct xe_gsc *gsc)
{
	struct xe_gt *gt = gsc_to_gt(gsc);
	struct xe_device *xe = gt_to_xe(gt);
	u32 fw_size = gsc->fw.size;
	void *storage;

	/*
	 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
	 * a memcpy for now.
	 */
	storage = kmalloc(fw_size, GFP_KERNEL);
	if (!storage)
		return -ENOMEM;

	xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
	xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
	xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);

	kfree(storage);

	return 0;
}

static int emit_gsc_upload(struct xe_gsc *gsc)
{
	struct xe_gt *gt = gsc_to_gt(gsc);
	u64 offset = xe_bo_ggtt_addr(gsc->private);
	struct xe_bb *bb;
	struct xe_sched_job *job;
	struct dma_fence *fence;
	long timeout;

	bb = xe_bb_new(gt, 4, false);
	if (IS_ERR(bb))
		return PTR_ERR(bb);

	bb->cs[bb->len++] = GSC_FW_LOAD;
	bb->cs[bb->len++] = lower_32_bits(offset);
	bb->cs[bb->len++] = upper_32_bits(offset);
	bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;

	job = xe_bb_create_job(gsc->q, bb);
	if (IS_ERR(job)) {
		xe_bb_free(bb, NULL);
		return PTR_ERR(job);
	}

	xe_sched_job_arm(job);
	fence = dma_fence_get(&job->drm.s_fence->finished);
	xe_sched_job_push(job);

	timeout = dma_fence_wait_timeout(fence, false, HZ);
	dma_fence_put(fence);
	xe_bb_free(bb, NULL);
	if (timeout < 0)
		return timeout;
	else if (!timeout)
		return -ETIME;

	return 0;
}

#define version_query_wr(xe_, map_, offset_, field_, val_) \
	xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_)
#define version_query_rd(xe_, map_, offset_, field_) \
	xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_)

static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset)
{
	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in));

	version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV);
	version_query_wr(xe, map, wr_offset, header.command,
			 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION);

	return wr_offset + sizeof(struct gsc_get_compatibility_version_in);
}

#define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */
static int query_compatibility_version(struct xe_gsc *gsc)
{
	struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
	struct xe_gt *gt = gsc_to_gt(gsc);
	struct xe_tile *tile = gt_to_tile(gt);
	struct xe_device *xe = gt_to_xe(gt);
	struct xe_bo *bo;
	u32 wr_offset;
	u32 rd_offset;
	u64 ggtt_offset;
	int err;

	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
				  ttm_bo_type_kernel,
				  XE_BO_FLAG_SYSTEM |
				  XE_BO_FLAG_GGTT);
	if (IS_ERR(bo)) {
		xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
		return PTR_ERR(bo);
	}

	ggtt_offset = xe_bo_ggtt_addr(bo);

	wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0,
				       sizeof(struct gsc_get_compatibility_version_in));
	wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset);

	err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset,
				       ggtt_offset + GSC_VER_PKT_SZ,
				       GSC_VER_PKT_SZ);
	if (err) {
		xe_gt_err(gt,
			  "failed to submit GSC request for compatibility version: %d\n",
			  err);
		goto out_bo;
	}

	err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ,
				     sizeof(struct gsc_get_compatibility_version_out),
				     &rd_offset);
	if (err) {
		xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
		return err;
	}

	compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major);
	compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor);

	xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor);

out_bo:
	xe_bo_unpin_map_no_vm(bo);
	return err;
}

static int gsc_fw_is_loaded(struct xe_gt *gt)
{
	return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
			      HECI1_FWSTS1_INIT_COMPLETE;
}

static int gsc_fw_wait(struct xe_gt *gt)
{
	/*
	 * GSC load can take up to 250ms from the moment the instruction is
	 * executed by the GSCCS. To account for possible submission delays or
	 * other issues, we use a 500ms timeout in the wait here.
	 */
	return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
			      HECI1_FWSTS1_INIT_COMPLETE,
			      HECI1_FWSTS1_INIT_COMPLETE,
			      500 * USEC_PER_MSEC, NULL, false);
}

static int gsc_upload(struct xe_gsc *gsc)
{
	struct xe_gt *gt = gsc_to_gt(gsc);
	struct xe_device *xe = gt_to_xe(gt);
	int err;

	/* we should only be here if the init step were successful */
	xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);

	if (gsc_fw_is_loaded(gt)) {
		xe_gt_err(gt, "GSC already loaded at upload time\n");
		return -EEXIST;
	}

	err = memcpy_fw(gsc);
	if (err) {
		xe_gt_err(gt, "Failed to memcpy GSC FW\n");
		return err;
	}

	/*
	 * GSC is only killed by an FLR, so we need to trigger one on unload to
	 * make sure we stop it. This is because we assign a chunk of memory to
	 * the GSC as part of the FW load, so we need to make sure it stops
	 * using it when we release it to the system on driver unload. Note that
	 * this is not a problem of the unload per-se, because the GSC will not
	 * touch that memory unless there are requests for it coming from the
	 * driver; therefore, no accesses will happen while Xe is not loaded,
	 * but if we re-load the driver then the GSC might wake up and try to
	 * access that old memory location again.
	 * Given that an FLR is a very disruptive action (see the FLR function
	 * for details), we want to do it as the last action before releasing
	 * the access to the MMIO bar, which means we need to do it as part of
	 * mmio cleanup.
	 */
	xe->needs_flr_on_fini = true;

	err = emit_gsc_upload(gsc);
	if (err) {
		xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
		return err;
	}

	err = gsc_fw_wait(gt);
	if (err) {
		xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
		return err;
	}

	err = query_compatibility_version(gsc);
	if (err)
		return err;

	err = xe_uc_fw_check_version_requirements(&gsc->fw);
	if (err)
		return err;

	return 0;
}

static int gsc_upload_and_init(struct xe_gsc *gsc)
{
	struct xe_gt *gt = gsc_to_gt(gsc);
	struct xe_tile *tile = gt_to_tile(gt);
	int ret;

	if (XE_WA(tile->primary_gt, 14018094691)) {
		ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);

		/*
		 * If the forcewake fails we want to keep going, because the worst
		 * case outcome in failing to apply the WA is that PXP won't work,
		 * which is not fatal. We still throw a warning so the issue is
		 * seen if it happens.
		 */
		xe_gt_WARN_ON(tile->primary_gt, ret);

		xe_gt_mcr_multicast_write(tile->primary_gt,
					  EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK,
					  EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT);
	}

	ret = gsc_upload(gsc);

	if (XE_WA(tile->primary_gt, 14018094691))
		xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);

	if (ret)
		return ret;

	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);

	/* GSC load is done, restore expected GT frequencies */
	xe_gt_sanitize_freq(gt);

	xe_gt_dbg(gt, "GSC FW async load completed\n");

	/* HuC auth failure is not fatal */
	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);

	ret = xe_gsc_proxy_start(gsc);
	if (ret)
		return ret;

	xe_gt_dbg(gt, "GSC proxy init completed\n");

	return 0;
}

static int gsc_er_complete(struct xe_gt *gt)
{
	u32 er_status;

	if (!gsc_fw_is_loaded(gt))
		return 0;

	/*
	 * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the
	 * driver or the GuC hit the GDRST register, the CS is immediately reset
	 * and a success is reported, but the GSC shim keeps resetting in the
	 * background. While the shim reset is ongoing, the CS is able to accept
	 * new context submission, but any commands that require the shim will
	 * be stalled until the reset is completed. This means that we can keep
	 * submitting to the GSCCS as long as we make sure that the preemption
	 * timeout is big enough to cover any delay introduced by the reset.
	 * When the shim reset completes, a specific CS interrupt is triggered,
	 * in response to which we need to check the GSCI_TIMER_STATUS register
	 * to see if the reset was successful or not.
	 * Note that the GSCI_TIMER_STATUS register is not power save/restored,
	 * so it gets reset on MC6 entry. However, a reset failure stops MC6,
	 * so in that scenario we're always guaranteed to find the correct
	 * value.
	 */
	er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;

	if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) {
		/*
		 * XXX: we should trigger an FLR here, but we don't have support
		 * for that yet.
		 */
		xe_gt_err(gt, "GSC ER timed out!\n");
		return -EIO;
	}

	return 0;
}

static void gsc_work(struct work_struct *work)
{
	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
	struct xe_gt *gt = gsc_to_gt(gsc);
	struct xe_device *xe = gt_to_xe(gt);
	u32 actions;
	int ret;

	spin_lock_irq(&gsc->lock);
	actions = gsc->work_actions;
	gsc->work_actions = 0;
	spin_unlock_irq(&gsc->lock);

	xe_pm_runtime_get(xe);
	xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));

	if (actions & GSC_ACTION_ER_COMPLETE) {
		ret = gsc_er_complete(gt);
		if (ret)
			goto out;
	}

	if (actions & GSC_ACTION_FW_LOAD) {
		ret = gsc_upload_and_init(gsc);
		if (ret && ret != -EEXIST)
			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
		else
			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING);
	}

	if (actions & GSC_ACTION_SW_PROXY)
		xe_gsc_proxy_request_handler(gsc);

out:
	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
	xe_pm_runtime_put(xe);
}

void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec)
{
	struct xe_gt *gt = hwe->gt;
	struct xe_gsc *gsc = &gt->uc.gsc;

	if (unlikely(!intr_vec))
		return;

	if (intr_vec & GSC_ER_COMPLETE) {
		spin_lock(&gsc->lock);
		gsc->work_actions |= GSC_ACTION_ER_COMPLETE;
		spin_unlock(&gsc->lock);

		queue_work(gsc->wq, &gsc->work);
	}
}

int xe_gsc_init(struct xe_gsc *gsc)
{
	struct xe_gt *gt = gsc_to_gt(gsc);
	struct xe_tile *tile = gt_to_tile(gt);
	int ret;

	gsc->fw.type = XE_UC_FW_TYPE_GSC;
	INIT_WORK(&gsc->work, gsc_work);
	spin_lock_init(&gsc->lock);

	/* The GSC uC is only available on the media GT */
	if (tile->media_gt && (gt != tile->media_gt)) {
		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
		return 0;
	}

	/*
	 * Some platforms can have GuC but not GSC. That would cause
	 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort
	 * all firmware loading. So check for GSC being enabled before
	 * propagating the failure back up. That way the higher level will keep
	 * going and load GuC as appropriate.
	 */
	ret = xe_uc_fw_init(&gsc->fw);
	if (!xe_uc_fw_is_enabled(&gsc->fw))
		return 0;
	else if (ret)
		goto out;

	ret = xe_gsc_proxy_init(gsc);
	if (ret && ret != -ENODEV)
		goto out;

	return 0;

out:
	xe_gt_err(gt, "GSC init failed with %d", ret);
	return ret;
}

static void free_resources(void *arg)
{
	struct xe_gsc *gsc = arg;

	if (gsc->wq) {
		destroy_workqueue(gsc->wq);
		gsc->wq = NULL;
	}

	if (gsc->q) {
		xe_exec_queue_put(gsc->q);
		gsc->q = NULL;
	}
}

int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
{
	struct xe_gt *gt = gsc_to_gt(gsc);
	struct xe_tile *tile = gt_to_tile(gt);
	struct xe_device *xe = gt_to_xe(gt);
	struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
	struct xe_exec_queue *q;
	struct workqueue_struct *wq;
	struct xe_bo *bo;
	int err;

	if (!xe_uc_fw_is_available(&gsc->fw))
		return 0;

	if (!hwe)
		return -ENODEV;

	bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M,
					  XE_BO_FLAG_STOLEN |
					  XE_BO_FLAG_GGTT);
	if (IS_ERR(bo))
		return PTR_ERR(bo);

	q = xe_exec_queue_create(xe, NULL,
				 BIT(hwe->logical_instance), 1, hwe,
				 EXEC_QUEUE_FLAG_KERNEL |
				 EXEC_QUEUE_FLAG_PERMANENT, 0);
	if (IS_ERR(q)) {
		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
		err = PTR_ERR(q);
		goto out_bo;
	}

	wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
	if (!wq) {
		err = -ENOMEM;
		goto out_q;
	}

	gsc->private = bo;
	gsc->q = q;
	gsc->wq = wq;

	err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc);
	if (err)
		return err;

	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);

	return 0;

out_q:
	xe_exec_queue_put(q);
out_bo:
	xe_bo_unpin_map_no_vm(bo);
	return err;
}

void xe_gsc_load_start(struct xe_gsc *gsc)
{
	struct xe_gt *gt = gsc_to_gt(gsc);

	if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
		return;

	/* GSC FW survives GT reset and D3Hot */
	if (gsc_fw_is_loaded(gt)) {
		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
		return;
	}

	spin_lock_irq(&gsc->lock);
	gsc->work_actions |= GSC_ACTION_FW_LOAD;
	spin_unlock_irq(&gsc->lock);

	queue_work(gsc->wq, &gsc->work);
}

void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
{
	if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
		flush_work(&gsc->work);
}

/**
 * xe_gsc_remove() - Clean up the GSC structures before driver removal
 * @gsc: the GSC uC
 */
void xe_gsc_remove(struct xe_gsc *gsc)
{
	xe_gsc_proxy_remove(gsc);
}

/*
 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
 * GSC engine reset by writing a notification bit in the GS1 register and then
 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms
 * for the FW to get prepare for the reset, so we need to wait for that amount
 * of time.
 * After the reset is complete we need to then clear the GS1 register.
 */
void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
{
	u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0;
	u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;

	/* WA only applies if the GSC is loaded */
	if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
		return;

	xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);

	if (prep) {
		/* make sure the reset bit is clear when writing the CSR reg */
		xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
			      HECI_H_CSR_RST, HECI_H_CSR_IG);
		msleep(200);
	}
}