From ac9738bb3e5374495908ad236285f69cfd405f8e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 10 Aug 2014 04:10:29 +1000 Subject: drm/gf100-/gr: add support for zero bandwidth clear Default ZBC table is compatible with binary driver defaults. Userspace will need to be updated to take full advantage of this feature, however, some applications will see a performance boost without updated drivers. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c | 2 +- drivers/gpu/drm/nouveau/core/engine/graph/gm107.c | 5 +- drivers/gpu/drm/nouveau/core/engine/graph/nv108.c | 2 +- drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c | 219 +++++++++++++++++++++- drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h | 23 +++ drivers/gpu/drm/nouveau/core/engine/graph/nvc1.c | 4 +- drivers/gpu/drm/nouveau/core/engine/graph/nvc8.c | 6 +- drivers/gpu/drm/nouveau/core/engine/graph/nve4.c | 5 +- drivers/gpu/drm/nouveau/core/engine/graph/nvf0.c | 2 +- drivers/gpu/drm/nouveau/nvif/class.h | 56 ++++++ 10 files changed, 313 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c b/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c index c5697133640a..74a51fc2ec8a 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c @@ -27,7 +27,7 @@ static struct nouveau_oclass gk20a_graph_sclass[] = { { 0x902d, &nouveau_object_ofuncs }, { 0xa040, &nouveau_object_ofuncs }, - { 0xa297, &nouveau_object_ofuncs }, + { KEPLER_C, &nvc0_fermi_ofuncs }, { 0xa0c0, &nouveau_object_ofuncs }, {} }; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/gm107.c b/drivers/gpu/drm/nouveau/core/engine/graph/gm107.c index 21c5f31d607f..60d86f314281 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/gm107.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/gm107.c @@ -36,7 +36,7 @@ static struct nouveau_oclass gm107_graph_sclass[] = { { 0x902d, &nouveau_object_ofuncs }, { 0xa140, &nouveau_object_ofuncs }, - { 0xb097, &nouveau_object_ofuncs }, + { MAXWELL_A, &nvc0_fermi_ofuncs }, { 0xb0c0, &nouveau_object_ofuncs }, {} }; @@ -425,6 +425,9 @@ gm107_graph_init(struct nouveau_object *object) nv_wr32(priv, 0x400134, 0xffffffff); nv_wr32(priv, 0x400054, 0x2c350f63); + + nvc0_graph_zbc_init(priv); + return nvc0_graph_init_ctxctl(priv); } diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv108.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv108.c index 00ea1a089822..01e99faeb9a0 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nv108.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv108.c @@ -33,7 +33,7 @@ static struct nouveau_oclass nv108_graph_sclass[] = { { 0x902d, &nouveau_object_ofuncs }, { 0xa140, &nouveau_object_ofuncs }, - { 0xa197, &nouveau_object_ofuncs }, + { KEPLER_B, &nvc0_fermi_ofuncs }, { 0xa1c0, &nouveau_object_ofuncs }, {} }; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c index cda70fca9bc4..0156862fb34d 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c @@ -25,15 +25,200 @@ #include "nvc0.h" #include "ctxnvc0.h" +/******************************************************************************* + * Zero Bandwidth Clear + ******************************************************************************/ + +static void +nvc0_graph_zbc_clear_color(struct nvc0_graph_priv *priv, int zbc) +{ + if (priv->zbc_color[zbc].format) { + nv_wr32(priv, 0x405804, priv->zbc_color[zbc].ds[0]); + nv_wr32(priv, 0x405808, priv->zbc_color[zbc].ds[1]); + nv_wr32(priv, 0x40580c, priv->zbc_color[zbc].ds[2]); + nv_wr32(priv, 0x405810, priv->zbc_color[zbc].ds[3]); + } + nv_wr32(priv, 0x405814, priv->zbc_color[zbc].format); + nv_wr32(priv, 0x405820, zbc); + nv_wr32(priv, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */ +} + +static int +nvc0_graph_zbc_color_get(struct nvc0_graph_priv *priv, int format, + const u32 ds[4], const u32 l2[4]) +{ + struct nouveau_ltc *ltc = nouveau_ltc(priv); + int zbc = -ENOSPC, i; + + for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { + if (priv->zbc_color[i].format) { + if (priv->zbc_color[i].format != format) + continue; + if (memcmp(priv->zbc_color[i].ds, ds, sizeof( + priv->zbc_color[i].ds))) + continue; + if (memcmp(priv->zbc_color[i].l2, l2, sizeof( + priv->zbc_color[i].l2))) { + WARN_ON(1); + return -EINVAL; + } + return i; + } else { + zbc = (zbc < 0) ? i : zbc; + } + } + + memcpy(priv->zbc_color[zbc].ds, ds, sizeof(priv->zbc_color[zbc].ds)); + memcpy(priv->zbc_color[zbc].l2, l2, sizeof(priv->zbc_color[zbc].l2)); + priv->zbc_color[zbc].format = format; + ltc->zbc_color_get(ltc, zbc, l2); + nvc0_graph_zbc_clear_color(priv, zbc); + return zbc; +} + +static void +nvc0_graph_zbc_clear_depth(struct nvc0_graph_priv *priv, int zbc) +{ + if (priv->zbc_depth[zbc].format) + nv_wr32(priv, 0x405818, priv->zbc_depth[zbc].ds); + nv_wr32(priv, 0x40581c, priv->zbc_depth[zbc].format); + nv_wr32(priv, 0x405820, zbc); + nv_wr32(priv, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */ +} + +static int +nvc0_graph_zbc_depth_get(struct nvc0_graph_priv *priv, int format, + const u32 ds, const u32 l2) +{ + struct nouveau_ltc *ltc = nouveau_ltc(priv); + int zbc = -ENOSPC, i; + + for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { + if (priv->zbc_depth[i].format) { + if (priv->zbc_depth[i].format != format) + continue; + if (priv->zbc_depth[i].ds != ds) + continue; + if (priv->zbc_depth[i].l2 != l2) { + WARN_ON(1); + return -EINVAL; + } + return i; + } else { + zbc = (zbc < 0) ? i : zbc; + } + } + + priv->zbc_depth[zbc].format = format; + priv->zbc_depth[zbc].ds = ds; + priv->zbc_depth[zbc].l2 = l2; + ltc->zbc_depth_get(ltc, zbc, l2); + nvc0_graph_zbc_clear_depth(priv, zbc); + return zbc; +} + /******************************************************************************* * Graphics object classes ******************************************************************************/ +static int +nvc0_fermi_mthd_zbc_color(struct nouveau_object *object, void *data, u32 size) +{ + struct nvc0_graph_priv *priv = (void *)object->engine; + union { + struct fermi_a_zbc_color_v0 v0; + } *args = data; + int ret; + + if (nvif_unpack(args->v0, 0, 0, false)) { + switch (args->v0.format) { + case FERMI_A_ZBC_COLOR_V0_FMT_ZERO: + case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE: + case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32: + case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16: + case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16: + case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16: + case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16: + case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16: + case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8: + case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8: + case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10: + case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10: + case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8: + case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8: + case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8: + case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8: + case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8: + case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10: + case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11: + ret = nvc0_graph_zbc_color_get(priv, args->v0.format, + args->v0.ds, + args->v0.l2); + if (ret >= 0) { + args->v0.index = ret; + return 0; + } + break; + default: + return -EINVAL; + } + } + + return ret; +} + +static int +nvc0_fermi_mthd_zbc_depth(struct nouveau_object *object, void *data, u32 size) +{ + struct nvc0_graph_priv *priv = (void *)object->engine; + union { + struct fermi_a_zbc_depth_v0 v0; + } *args = data; + int ret; + + if (nvif_unpack(args->v0, 0, 0, false)) { + switch (args->v0.format) { + case FERMI_A_ZBC_DEPTH_V0_FMT_FP32: + ret = nvc0_graph_zbc_depth_get(priv, args->v0.format, + args->v0.ds, + args->v0.l2); + return (ret >= 0) ? 0 : -ENOSPC; + default: + return -EINVAL; + } + } + + return ret; +} + +static int +nvc0_fermi_mthd(struct nouveau_object *object, u32 mthd, void *data, u32 size) +{ + switch (mthd) { + case FERMI_A_ZBC_COLOR: + return nvc0_fermi_mthd_zbc_color(object, data, size); + case FERMI_A_ZBC_DEPTH: + return nvc0_fermi_mthd_zbc_depth(object, data, size); + default: + break; + } + return -EINVAL; +} + +struct nouveau_ofuncs +nvc0_fermi_ofuncs = { + .ctor = _nouveau_object_ctor, + .dtor = nouveau_object_destroy, + .init = nouveau_object_init, + .fini = nouveau_object_fini, + .mthd = nvc0_fermi_mthd, +}; + struct nouveau_oclass nvc0_graph_sclass[] = { { 0x902d, &nouveau_object_ofuncs }, { 0x9039, &nouveau_object_ofuncs }, - { 0x9097, &nouveau_object_ofuncs }, + { FERMI_A, &nvc0_fermi_ofuncs }, { 0x90c0, &nouveau_object_ofuncs }, {} }; @@ -406,6 +591,35 @@ nvc0_graph_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +void +nvc0_graph_zbc_init(struct nvc0_graph_priv *priv) +{ + const u32 zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; + const u32 one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; + const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, + 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; + struct nouveau_ltc *ltc = nouveau_ltc(priv); + int index; + + if (!priv->zbc_color[0].format) { + nvc0_graph_zbc_color_get(priv, 1, & zero[0], &zero[4]); + nvc0_graph_zbc_color_get(priv, 2, & one[0], &one[4]); + nvc0_graph_zbc_color_get(priv, 4, &f32_0[0], &f32_0[4]); + nvc0_graph_zbc_color_get(priv, 4, &f32_1[0], &f32_1[4]); + nvc0_graph_zbc_depth_get(priv, 1, 0x00000000, 0x00000000); + nvc0_graph_zbc_depth_get(priv, 1, 0x3f800000, 0x3f800000); + } + + for (index = ltc->zbc_min; index <= ltc->zbc_max; index++) + nvc0_graph_zbc_clear_color(priv, index); + for (index = ltc->zbc_min; index <= ltc->zbc_max; index++) + nvc0_graph_zbc_clear_depth(priv, index); +} + void nvc0_graph_mmio(struct nvc0_graph_priv *priv, const struct nvc0_graph_pack *p) { @@ -1223,6 +1437,9 @@ nvc0_graph_init(struct nouveau_object *object) nv_wr32(priv, 0x400134, 0xffffffff); nv_wr32(priv, 0x400054, 0x34ce3464); + + nvc0_graph_zbc_init(priv); + return nvc0_graph_init_ctxctl(priv); } diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h index a599b4fb5dd7..bfe9bdddbec8 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h @@ -30,11 +30,15 @@ #include #include +#include +#include + #include #include #include #include #include +#include #include #include @@ -69,6 +73,18 @@ struct nvc0_graph_fuc { u32 size; }; +struct nvc0_graph_zbc_color { + u32 format; + u32 ds[4]; + u32 l2[4]; +}; + +struct nvc0_graph_zbc_depth { + u32 format; + u32 ds; + u32 l2; +}; + struct nvc0_graph_priv { struct nouveau_graph base; @@ -78,6 +94,9 @@ struct nvc0_graph_priv { struct nvc0_graph_fuc fuc41ad; bool firmware; + struct nvc0_graph_zbc_color zbc_color[NOUVEAU_LTC_MAX_ZBC_CNT]; + struct nvc0_graph_zbc_depth zbc_depth[NOUVEAU_LTC_MAX_ZBC_CNT]; + u8 rop_nr; u8 gpc_nr; u8 tpc_nr[GPC_MAX]; @@ -119,11 +138,15 @@ int nvc0_graph_ctor(struct nouveau_object *, struct nouveau_object *, struct nouveau_object **); void nvc0_graph_dtor(struct nouveau_object *); int nvc0_graph_init(struct nouveau_object *); +void nvc0_graph_zbc_init(struct nvc0_graph_priv *); + int nve4_graph_fini(struct nouveau_object *, bool); int nve4_graph_init(struct nouveau_object *); int nvf0_graph_fini(struct nouveau_object *, bool); +extern struct nouveau_ofuncs nvc0_fermi_ofuncs; + extern struct nouveau_oclass nvc0_graph_sclass[]; extern struct nouveau_oclass nvc8_graph_sclass[]; extern struct nouveau_oclass nvf0_graph_sclass[]; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc1.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc1.c index 30cab0b2eba1..80096762e483 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc1.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc1.c @@ -33,9 +33,9 @@ static struct nouveau_oclass nvc1_graph_sclass[] = { { 0x902d, &nouveau_object_ofuncs }, { 0x9039, &nouveau_object_ofuncs }, - { 0x9097, &nouveau_object_ofuncs }, + { FERMI_A, &nvc0_fermi_ofuncs }, + { FERMI_B, &nvc0_fermi_ofuncs }, { 0x90c0, &nouveau_object_ofuncs }, - { 0x9197, &nouveau_object_ofuncs }, {} }; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc8.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc8.c index a6bf783e1256..c944590838d4 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc8.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc8.c @@ -33,10 +33,10 @@ struct nouveau_oclass nvc8_graph_sclass[] = { { 0x902d, &nouveau_object_ofuncs }, { 0x9039, &nouveau_object_ofuncs }, - { 0x9097, &nouveau_object_ofuncs }, + { FERMI_A, &nvc0_fermi_ofuncs }, + { FERMI_B, &nvc0_fermi_ofuncs }, + { FERMI_C, &nvc0_fermi_ofuncs }, { 0x90c0, &nouveau_object_ofuncs }, - { 0x9197, &nouveau_object_ofuncs }, - { 0x9297, &nouveau_object_ofuncs }, {} }; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c index fb9cb929320d..9ba01fbb2557 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c @@ -35,7 +35,7 @@ static struct nouveau_oclass nve4_graph_sclass[] = { { 0x902d, &nouveau_object_ofuncs }, { 0xa040, &nouveau_object_ofuncs }, - { 0xa097, &nouveau_object_ofuncs }, + { KEPLER_A, &nvc0_fermi_ofuncs }, { 0xa0c0, &nouveau_object_ofuncs }, {} }; @@ -303,6 +303,9 @@ nve4_graph_init(struct nouveau_object *object) nv_wr32(priv, 0x400134, 0xffffffff); nv_wr32(priv, 0x400054, 0x34ce3464); + + nvc0_graph_zbc_init(priv); + return nvc0_graph_init_ctxctl(priv); } diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvf0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvf0.c index 768c51f93910..b82b40a1dc11 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvf0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvf0.c @@ -33,7 +33,7 @@ struct nouveau_oclass nvf0_graph_sclass[] = { { 0x902d, &nouveau_object_ofuncs }, { 0xa140, &nouveau_object_ofuncs }, - { 0xa197, &nouveau_object_ofuncs }, + { KEPLER_B, &nvc0_fermi_ofuncs }, { 0xa1c0, &nouveau_object_ofuncs }, {} }; diff --git a/drivers/gpu/drm/nouveau/nvif/class.h b/drivers/gpu/drm/nouveau/nvif/class.h index c9897f4003a9..73bf1269c72e 100644 --- a/drivers/gpu/drm/nouveau/nvif/class.h +++ b/drivers/gpu/drm/nouveau/nvif/class.h @@ -73,6 +73,16 @@ #define GF110_DISP_OVERLAY_CONTROL_DMA 0x0000907e #define GK104_DISP_OVERLAY_CONTROL_DMA 0x0000917e +#define FERMI_A 0x00009097 +#define FERMI_B 0x00009197 +#define FERMI_C 0x00009297 + +#define KEPLER_A 0x0000a097 +#define KEPLER_B 0x0000a197 +#define KEPLER_C 0x0000a297 + +#define MAXWELL_A 0x0000b097 + /******************************************************************************* * client @@ -491,4 +501,50 @@ struct nv50_disp_overlay_v0 { __u8 pad02[6]; }; + +/******************************************************************************* + * fermi + ******************************************************************************/ + +#define FERMI_A_ZBC_COLOR 0x00 +#define FERMI_A_ZBC_DEPTH 0x01 + +struct fermi_a_zbc_color_v0 { + __u8 version; +#define FERMI_A_ZBC_COLOR_V0_FMT_ZERO 0x01 +#define FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE 0x02 +#define FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32 0x04 +#define FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16 0x08 +#define FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16 0x0c +#define FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16 0x10 +#define FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16 0x14 +#define FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16 0x16 +#define FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8 0x18 +#define FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8 0x1c +#define FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10 0x20 +#define FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10 0x24 +#define FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8 0x28 +#define FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8 0x2c +#define FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8 0x30 +#define FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8 0x34 +#define FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8 0x38 +#define FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10 0x3c +#define FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11 0x40 + __u8 format; + __u8 index; + __u8 pad03[5]; + __u32 ds[4]; + __u32 l2[4]; +}; + +struct fermi_a_zbc_depth_v0 { + __u8 version; +#define FERMI_A_ZBC_DEPTH_V0_FMT_FP32 0x01 + __u8 format; + __u8 index; + __u8 pad03[5]; + __u32 ds; + __u32 l2; +}; + #endif -- cgit v1.2.3