1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
|
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2020-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef GAUDI2P_H_
#define GAUDI2P_H_
#include <uapi/misc/habanalabs.h>
#include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include "../include/gaudi2/gaudi2.h"
#include "../include/gaudi2/gaudi2_packets.h"
#include "../include/gaudi2/gaudi2_fw_if.h"
#include "../include/gaudi2/gaudi2_async_events.h"
#define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb"
#define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb"
#define MMU_PAGE_TABLES_INITIAL_SIZE 0x10000000 /* 256MB */
#define GAUDI2_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define NUMBER_OF_PDMA_QUEUES 2
#define NUMBER_OF_EDMA_QUEUES 8
#define NUMBER_OF_MME_QUEUES 4
#define NUMBER_OF_TPC_QUEUES 25
#define NUMBER_OF_NIC_QUEUES 24
#define NUMBER_OF_ROT_QUEUES 2
#define NUMBER_OF_CPU_QUEUES 1
#define NUMBER_OF_HW_QUEUES ((NUMBER_OF_PDMA_QUEUES + \
NUMBER_OF_EDMA_QUEUES + \
NUMBER_OF_MME_QUEUES + \
NUMBER_OF_TPC_QUEUES + \
NUMBER_OF_NIC_QUEUES + \
NUMBER_OF_ROT_QUEUES + \
NUMBER_OF_CPU_QUEUES) * \
NUM_OF_PQ_PER_QMAN)
#define NUMBER_OF_QUEUES (NUMBER_OF_CPU_QUEUES + NUMBER_OF_HW_QUEUES)
#define DCORE_NUM_OF_SOB \
(((mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_8191 - \
mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)
#define DCORE_NUM_OF_MONITORS \
(((mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_2047 - \
mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
#define NUMBER_OF_DEC ((NUM_OF_DEC_PER_DCORE * NUM_OF_DCORES) + NUMBER_OF_PCIE_DEC)
/* Map all arcs dccm + arc schedulers acp blocks */
#define NUM_OF_USER_ACP_BLOCKS (NUM_OF_SCHEDULER_ARC + 2)
#define NUM_OF_USER_NIC_UMR_BLOCKS 15
#define NUM_OF_EXPOSED_SM_BLOCKS ((NUM_OF_DCORES - 1) * 2)
#define NUM_USER_MAPPED_BLOCKS \
(NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS + NUMBER_OF_DEC + \
NUM_OF_EXPOSED_SM_BLOCKS + \
(NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS))
/* Within the user mapped array, decoder entries start post all the ARC related
* entries
*/
#define USR_MAPPED_BLK_DEC_START_IDX \
(NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS + \
(NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS))
#define USR_MAPPED_BLK_SM_START_IDX \
(NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS + NUMBER_OF_DEC + \
(NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS))
#define SM_OBJS_BLOCK_SIZE (mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - \
mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0)
#define GAUDI2_MAX_PENDING_CS 64
#if !IS_MAX_PENDING_CS_VALID(GAUDI2_MAX_PENDING_CS)
#error "GAUDI2_MAX_PENDING_CS must be power of 2 and greater than 1"
#endif
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
#define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */
#define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */
#define GAUDI2_NIC_CLK_FREQ 450000000ull /* 450 MHz */
#define DC_POWER_DEFAULT 60000 /* 60W */
#define GAUDI2_HBM_NUM 6
#define DMA_MAX_TRANSFER_SIZE U32_MAX
#define GAUDI2_DEFAULT_CARD_NAME "HL225"
#define QMAN_STREAMS 4
#define PQ_FETCHER_CACHE_SIZE 8
#define NUM_OF_MME_SBTE_PORTS 5
#define NUM_OF_MME_WB_PORTS 2
#define GAUDI2_ENGINE_ID_DCORE_OFFSET \
(GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
/* This define should be used only when working in a debug mode without dram.
* When working with dram, the driver size will be calculated dynamically.
*/
#define NIC_DEFAULT_DRV_SIZE 0x20000000 /* 512MB */
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
#define NIC_NUMBER_OF_PORTS NIC_NUMBER_OF_ENGINES
#define NUMBER_OF_PCIE_DEC 2
#define PCIE_DEC_SHIFT 8
#define SRAM_USER_BASE_OFFSET 0
/* cluster binning */
#define MAX_FAULTY_HBMS 1
#define GAUDI2_XBAR_EDGE_FULL_MASK 0xF
#define GAUDI2_EDMA_FULL_MASK 0xFF
#define GAUDI2_DRAM_FULL_MASK 0x3F
/* Host virtual address space. */
#define VA_HOST_SPACE_PAGE_START 0xFFF0000000000000ull
#define VA_HOST_SPACE_PAGE_END 0xFFF0800000000000ull /* 140TB */
#define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull
#define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */
/* 140TB */
#define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START)
/* 140TB */
#define VA_HOST_SPACE_HPAGE_SIZE (VA_HOST_SPACE_HPAGE_END - VA_HOST_SPACE_HPAGE_START)
#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_PAGE_SIZE + VA_HOST_SPACE_HPAGE_SIZE)
#define HOST_SPACE_INTERNAL_CB_SZ SZ_2M
/*
* HBM virtual address space
* Gaudi2 has 6 HBM devices, each supporting 16GB total of 96GB at most.
* No core separation is supported so we can have one chunk of virtual address
* space just above the physical ones.
* The virtual address space starts immediately after the end of the physical
* address space which is determined at run-time.
*/
#define VA_HBM_SPACE_END 0x1002000000000000ull
#define HW_CAP_PLL BIT_ULL(0)
#define HW_CAP_DRAM BIT_ULL(1)
#define HW_CAP_PMMU BIT_ULL(2)
#define HW_CAP_CPU BIT_ULL(3)
#define HW_CAP_MSIX BIT_ULL(4)
#define HW_CAP_CPU_Q BIT_ULL(5)
#define HW_CAP_CPU_Q_SHIFT 5
#define HW_CAP_CLK_GATE BIT_ULL(6)
#define HW_CAP_KDMA BIT_ULL(7)
#define HW_CAP_SRAM_SCRAMBLER BIT_ULL(8)
#define HW_CAP_DCORE0_DMMU0 BIT_ULL(9)
#define HW_CAP_DCORE0_DMMU1 BIT_ULL(10)
#define HW_CAP_DCORE0_DMMU2 BIT_ULL(11)
#define HW_CAP_DCORE0_DMMU3 BIT_ULL(12)
#define HW_CAP_DCORE1_DMMU0 BIT_ULL(13)
#define HW_CAP_DCORE1_DMMU1 BIT_ULL(14)
#define HW_CAP_DCORE1_DMMU2 BIT_ULL(15)
#define HW_CAP_DCORE1_DMMU3 BIT_ULL(16)
#define HW_CAP_DCORE2_DMMU0 BIT_ULL(17)
#define HW_CAP_DCORE2_DMMU1 BIT_ULL(18)
#define HW_CAP_DCORE2_DMMU2 BIT_ULL(19)
#define HW_CAP_DCORE2_DMMU3 BIT_ULL(20)
#define HW_CAP_DCORE3_DMMU0 BIT_ULL(21)
#define HW_CAP_DCORE3_DMMU1 BIT_ULL(22)
#define HW_CAP_DCORE3_DMMU2 BIT_ULL(23)
#define HW_CAP_DCORE3_DMMU3 BIT_ULL(24)
#define HW_CAP_DMMU_MASK GENMASK_ULL(24, 9)
#define HW_CAP_DMMU_SHIFT 9
#define HW_CAP_PDMA_MASK BIT_ULL(26)
#define HW_CAP_EDMA_MASK GENMASK_ULL(34, 27)
#define HW_CAP_EDMA_SHIFT 27
#define HW_CAP_MME_MASK GENMASK_ULL(38, 35)
#define HW_CAP_MME_SHIFT 35
#define HW_CAP_ROT_MASK GENMASK_ULL(40, 39)
#define HW_CAP_ROT_SHIFT 39
#define HW_CAP_HBM_SCRAMBLER_HW_RESET BIT_ULL(41)
#define HW_CAP_HBM_SCRAMBLER_SW_RESET BIT_ULL(42)
#define HW_CAP_HBM_SCRAMBLER_MASK (HW_CAP_HBM_SCRAMBLER_HW_RESET | \
HW_CAP_HBM_SCRAMBLER_SW_RESET)
#define HW_CAP_HBM_SCRAMBLER_SHIFT 41
#define HW_CAP_RESERVED BIT(43)
#define HW_CAP_MMU_MASK (HW_CAP_PMMU | HW_CAP_DMMU_MASK)
/* Range Registers */
#define RR_TYPE_SHORT 0
#define RR_TYPE_LONG 1
#define RR_TYPE_SHORT_PRIV 2
#define RR_TYPE_LONG_PRIV 3
#define NUM_SHORT_LBW_RR 14
#define NUM_LONG_LBW_RR 4
#define NUM_SHORT_HBW_RR 6
#define NUM_LONG_HBW_RR 4
/* RAZWI initiator coordinates- X- 5 bits, Y- 4 bits */
#define RAZWI_INITIATOR_X_SHIFT 0
#define RAZWI_INITIATOR_X_MASK 0x1F
#define RAZWI_INITIATOR_Y_SHIFT 5
#define RAZWI_INITIATOR_Y_MASK 0xF
#define RTR_ID_X_Y(x, y) \
((((y) & RAZWI_INITIATOR_Y_MASK) << RAZWI_INITIATOR_Y_SHIFT) | \
(((x) & RAZWI_INITIATOR_X_MASK) << RAZWI_INITIATOR_X_SHIFT))
/* decoders have separate mask */
#define HW_CAP_DEC_SHIFT 0
#define HW_CAP_DEC_MASK GENMASK_ULL(9, 0)
/* TPCs have separate mask */
#define HW_CAP_TPC_SHIFT 0
#define HW_CAP_TPC_MASK GENMASK_ULL(24, 0)
/* nics have separate mask */
#define HW_CAP_NIC_SHIFT 0
#define HW_CAP_NIC_MASK GENMASK_ULL(NIC_NUMBER_OF_ENGINES - 1, 0)
#define GAUDI2_ARC_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 28)) >> 28)
#define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))
enum gaudi2_reserved_sob_id {
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
GAUDI2_RESERVED_SOB_CS_COMPLETION_LAST =
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST + GAUDI2_MAX_PENDING_CS - 1,
GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
GAUDI2_RESERVED_SOB_DEC_NRM_FIRST,
GAUDI2_RESERVED_SOB_DEC_NRM_LAST =
GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + NUMBER_OF_DEC - 1,
GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST,
GAUDI2_RESERVED_SOB_DEC_ABNRM_LAST =
GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + NUMBER_OF_DEC - 1,
GAUDI2_RESERVED_SOB_NUMBER
};
enum gaudi2_reserved_mon_id {
GAUDI2_RESERVED_MON_CS_COMPLETION_FIRST,
GAUDI2_RESERVED_MON_CS_COMPLETION_LAST =
GAUDI2_RESERVED_MON_CS_COMPLETION_FIRST + GAUDI2_MAX_PENDING_CS - 1,
GAUDI2_RESERVED_MON_KDMA_COMPLETION,
GAUDI2_RESERVED_MON_DEC_NRM_FIRST,
GAUDI2_RESERVED_MON_DEC_NRM_LAST =
GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * NUMBER_OF_DEC - 1,
GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST,
GAUDI2_RESERVED_MON_DEC_ABNRM_LAST =
GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * NUMBER_OF_DEC - 1,
GAUDI2_RESERVED_MON_NUMBER
};
enum gaudi2_reserved_cq_id {
GAUDI2_RESERVED_CQ_CS_COMPLETION,
GAUDI2_RESERVED_CQ_KDMA_COMPLETION,
GAUDI2_RESERVED_CQ_NUMBER
};
/*
* Gaudi2 subtitute TPCs Numbering
* At most- two faulty TPCs are allowed
* First replacement to a faulty TPC will be TPC24, second- TPC23
*/
enum substitude_tpc {
FAULTY_TPC_SUBTS_1_TPC_24,
FAULTY_TPC_SUBTS_2_TPC_23,
MAX_FAULTY_TPCS
};
enum gaudi2_dma_core_id {
DMA_CORE_ID_PDMA0, /* Dcore 0 */
DMA_CORE_ID_PDMA1, /* Dcore 0 */
DMA_CORE_ID_EDMA0, /* Dcore 0 */
DMA_CORE_ID_EDMA1, /* Dcore 0 */
DMA_CORE_ID_EDMA2, /* Dcore 1 */
DMA_CORE_ID_EDMA3, /* Dcore 1 */
DMA_CORE_ID_EDMA4, /* Dcore 2 */
DMA_CORE_ID_EDMA5, /* Dcore 2 */
DMA_CORE_ID_EDMA6, /* Dcore 3 */
DMA_CORE_ID_EDMA7, /* Dcore 3 */
DMA_CORE_ID_KDMA, /* Dcore 0 */
DMA_CORE_ID_SIZE
};
enum gaudi2_rotator_id {
ROTATOR_ID_0,
ROTATOR_ID_1,
ROTATOR_ID_SIZE,
};
enum gaudi2_mme_id {
MME_ID_DCORE0,
MME_ID_DCORE1,
MME_ID_DCORE2,
MME_ID_DCORE3,
MME_ID_SIZE,
};
enum gaudi2_tpc_id {
TPC_ID_DCORE0_TPC0,
TPC_ID_DCORE0_TPC1,
TPC_ID_DCORE0_TPC2,
TPC_ID_DCORE0_TPC3,
TPC_ID_DCORE0_TPC4,
TPC_ID_DCORE0_TPC5,
TPC_ID_DCORE1_TPC0,
TPC_ID_DCORE1_TPC1,
TPC_ID_DCORE1_TPC2,
TPC_ID_DCORE1_TPC3,
TPC_ID_DCORE1_TPC4,
TPC_ID_DCORE1_TPC5,
TPC_ID_DCORE2_TPC0,
TPC_ID_DCORE2_TPC1,
TPC_ID_DCORE2_TPC2,
TPC_ID_DCORE2_TPC3,
TPC_ID_DCORE2_TPC4,
TPC_ID_DCORE2_TPC5,
TPC_ID_DCORE3_TPC0,
TPC_ID_DCORE3_TPC1,
TPC_ID_DCORE3_TPC2,
TPC_ID_DCORE3_TPC3,
TPC_ID_DCORE3_TPC4,
TPC_ID_DCORE3_TPC5,
/* the PCI TPC is placed last (mapped liked HW) */
TPC_ID_DCORE0_TPC6,
TPC_ID_SIZE,
};
enum gaudi2_dec_id {
DEC_ID_DCORE0_DEC0,
DEC_ID_DCORE0_DEC1,
DEC_ID_DCORE1_DEC0,
DEC_ID_DCORE1_DEC1,
DEC_ID_DCORE2_DEC0,
DEC_ID_DCORE2_DEC1,
DEC_ID_DCORE3_DEC0,
DEC_ID_DCORE3_DEC1,
DEC_ID_PCIE_VDEC0,
DEC_ID_PCIE_VDEC1,
DEC_ID_SIZE,
};
enum gaudi2_hbm_id {
HBM_ID0,
HBM_ID1,
HBM_ID2,
HBM_ID3,
HBM_ID4,
HBM_ID5,
HBM_ID_SIZE,
};
/* specific EDMA enumeration */
enum gaudi2_edma_id {
EDMA_ID_DCORE0_INSTANCE0,
EDMA_ID_DCORE0_INSTANCE1,
EDMA_ID_DCORE1_INSTANCE0,
EDMA_ID_DCORE1_INSTANCE1,
EDMA_ID_DCORE2_INSTANCE0,
EDMA_ID_DCORE2_INSTANCE1,
EDMA_ID_DCORE3_INSTANCE0,
EDMA_ID_DCORE3_INSTANCE1,
EDMA_ID_SIZE,
};
/* User interrupt count is aligned with HW CQ count.
* We have 64 CQ's per dcore, CQ0 in dcore 0 is reserved for legacy mode
*/
#define GAUDI2_NUM_USER_INTERRUPTS 255
enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_EVENT_QUEUE = GAUDI2_EVENT_QUEUE_MSIX_IDX,
GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM,
GAUDI2_IRQ_NUM_DCORE0_DEC0_ABNRM,
GAUDI2_IRQ_NUM_DCORE0_DEC1_NRM,
GAUDI2_IRQ_NUM_DCORE0_DEC1_ABNRM,
GAUDI2_IRQ_NUM_DCORE1_DEC0_NRM,
GAUDI2_IRQ_NUM_DCORE1_DEC0_ABNRM,
GAUDI2_IRQ_NUM_DCORE1_DEC1_NRM,
GAUDI2_IRQ_NUM_DCORE1_DEC1_ABNRM,
GAUDI2_IRQ_NUM_DCORE2_DEC0_NRM,
GAUDI2_IRQ_NUM_DCORE2_DEC0_ABNRM,
GAUDI2_IRQ_NUM_DCORE2_DEC1_NRM,
GAUDI2_IRQ_NUM_DCORE2_DEC1_ABNRM,
GAUDI2_IRQ_NUM_DCORE3_DEC0_NRM,
GAUDI2_IRQ_NUM_DCORE3_DEC0_ABNRM,
GAUDI2_IRQ_NUM_DCORE3_DEC1_NRM,
GAUDI2_IRQ_NUM_DCORE3_DEC1_ABNRM,
GAUDI2_IRQ_NUM_SHARED_DEC0_NRM,
GAUDI2_IRQ_NUM_SHARED_DEC0_ABNRM,
GAUDI2_IRQ_NUM_SHARED_DEC1_NRM,
GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM,
GAUDI2_IRQ_NUM_COMPLETION,
GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
GAUDI2_IRQ_NUM_RESERVED_FIRST,
GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_USER_FIRST,
GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_LAST = (GAUDI2_MSIX_ENTRIES - 1)
};
static_assert(GAUDI2_IRQ_NUM_USER_FIRST > GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM);
/**
* struct dup_block_ctx - context to initialize unit instances across multiple
* blocks where block can be either a dcore of duplicated
* common module. this code relies on constant offsets
* of blocks and unit instances in a block.
* @instance_cfg_fn: instance specific configuration function.
* @data: private configuration data.
* @base: base address of the first instance in the first block.
* @block_off: subsequent blocks address spacing.
* @instance_off: subsequent block's instances address spacing.
* @enabled_mask: mask of enabled instances (1- enabled, 0- disabled).
* @blocks: number of blocks.
* @instances: unit instances per block.
*/
struct dup_block_ctx {
void (*instance_cfg_fn)(struct hl_device *hdev, u64 base, void *data);
void *data;
u64 base;
u64 block_off;
u64 instance_off;
u64 enabled_mask;
unsigned int blocks;
unsigned int instances;
};
/**
* struct gaudi2_device - ASIC specific manage structure.
* @cpucp_info_get: get information on device from CPU-CP
* @mapped_blocks: array that holds the base address and size of all blocks
* the user can map.
* @lfsr_rand_seeds: array of MME ACC random seeds to set.
* @hw_queues_lock: protects the H/W queues from concurrent access.
* @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory,
* this memory region should be write-only.
* currently used for HBW QMAN writes which is
* redundant.
* @scratchpad_bus_address: scratchpad bus address
* @virt_msix_db_cpu_addr: host memory page for the virtual MSI-X doorbell.
* @virt_msix_db_dma_addr: bus address of the page for the virtual MSI-X doorbell.
* @dram_bar_cur_addr: current address of DRAM PCI bar.
* @hw_cap_initialized: This field contains a bit per H/W engine. When that
* engine is initialized, that bit is set by the driver to
* signal we can use this engine in later code paths.
* Each bit is cleared upon reset of its corresponding H/W
* engine.
* @active_hw_arc: This field contains a bit per ARC of an H/W engine with
* exception of TPC and NIC engines. Once an engine arc is
* initialized, its respective bit is set. Driver can uniquely
* identify each initialized ARC and use this information in
* later code paths. Each respective bit is cleared upon reset
* of its corresponding ARC of the H/W engine.
* @dec_hw_cap_initialized: This field contains a bit per decoder H/W engine.
* When that engine is initialized, that bit is set by
* the driver to signal we can use this engine in later
* code paths.
* Each bit is cleared upon reset of its corresponding H/W
* engine.
* @tpc_hw_cap_initialized: This field contains a bit per TPC H/W engine.
* When that engine is initialized, that bit is set by
* the driver to signal we can use this engine in later
* code paths.
* Each bit is cleared upon reset of its corresponding H/W
* engine.
* @active_tpc_arc: This field contains a bit per ARC of the TPC engines.
* Once an engine arc is initialized, its respective bit is
* set. Each respective bit is cleared upon reset of its
* corresponding ARC of the TPC engine.
* @nic_hw_cap_initialized: This field contains a bit per nic H/W engine.
* @active_nic_arc: This field contains a bit per ARC of the NIC engines.
* Once an engine arc is initialized, its respective bit is
* set. Each respective bit is cleared upon reset of its
* corresponding ARC of the NIC engine.
* @hw_events: array that holds all H/W events that are defined valid.
* @events_stat: array that holds histogram of all received events.
* @events_stat_aggregate: same as events_stat but doesn't get cleared on reset.
* @num_of_valid_hw_events: used to hold the number of valid H/W events.
* @nic_ports: array that holds all NIC ports manage structures.
* @nic_macros: array that holds all NIC macro manage structures.
* @core_info: core info to be used by the Ethernet driver.
* @aux_ops: functions for core <-> aux drivers communication.
* @flush_db_fifo: flag to force flush DB FIFO after a write.
* @hbm_cfg: HBM subsystem settings
* @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
*/
struct gaudi2_device {
int (*cpucp_info_get)(struct hl_device *hdev);
struct user_mapped_block mapped_blocks[NUM_USER_MAPPED_BLOCKS];
int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS];
spinlock_t hw_queues_lock;
void *scratchpad_kernel_address;
dma_addr_t scratchpad_bus_address;
void *virt_msix_db_cpu_addr;
dma_addr_t virt_msix_db_dma_addr;
u64 dram_bar_cur_addr;
u64 hw_cap_initialized;
u64 active_hw_arc;
u64 dec_hw_cap_initialized;
u64 tpc_hw_cap_initialized;
u64 active_tpc_arc;
u64 nic_hw_cap_initialized;
u64 active_nic_arc;
u32 hw_events[GAUDI2_EVENT_SIZE];
u32 events_stat[GAUDI2_EVENT_SIZE];
u32 events_stat_aggregate[GAUDI2_EVENT_SIZE];
u32 num_of_valid_hw_events;
};
extern const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE];
extern const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE];
extern const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE];
extern const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE];
extern const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES];
extern const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE];
void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx);
int gaudi2_coresight_init(struct hl_device *hdev);
int gaudi2_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
void gaudi2_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx);
bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id);
void gaudi2_write_rr_to_all_lbw_rtrs(struct hl_device *hdev, u8 rr_type, u32 rr_index, u64 min_val,
u64 max_val);
void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32 cause,
u32 offended_addr);
int gaudi2_init_security(struct hl_device *hdev);
void gaudi2_ack_protection_bits_errors(struct hl_device *hdev);
int gaudi2_send_device_activity(struct hl_device *hdev, bool open);
#endif /* GAUDI2P_H_ */
|