drm/gf100-/gr: add support for zero bandwidth clear
authorBen Skeggs <bskeggs@redhat.com>
Sat, 9 Aug 2014 18:10:29 +0000 (04:10 +1000)
committerBen Skeggs <bskeggs@redhat.com>
Sat, 9 Aug 2014 19:28:13 +0000 (05:28 +1000)
Default ZBC table is compatible with binary driver defaults.

Userspace will need to be updated to take full advantage of this
feature, however, some applications will see a performance boost
without updated drivers.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c
drivers/gpu/drm/nouveau/core/engine/graph/gm107.c
drivers/gpu/drm/nouveau/core/engine/graph/nv108.c
drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
drivers/gpu/drm/nouveau/core/engine/graph/nvc1.c
drivers/gpu/drm/nouveau/core/engine/graph/nvc8.c
drivers/gpu/drm/nouveau/core/engine/graph/nve4.c
drivers/gpu/drm/nouveau/core/engine/graph/nvf0.c
drivers/gpu/drm/nouveau/nvif/class.h

index c569713..74a51fc 100644 (file)
@@ -27,7 +27,7 @@ static struct nouveau_oclass
 gk20a_graph_sclass[] = {
        { 0x902d, &nouveau_object_ofuncs },
        { 0xa040, &nouveau_object_ofuncs },
-       { 0xa297, &nouveau_object_ofuncs },
+       { KEPLER_C, &nvc0_fermi_ofuncs },
        { 0xa0c0, &nouveau_object_ofuncs },
        {}
 };
index 21c5f31..60d86f3 100644 (file)
@@ -36,7 +36,7 @@ static struct nouveau_oclass
 gm107_graph_sclass[] = {
        { 0x902d, &nouveau_object_ofuncs },
        { 0xa140, &nouveau_object_ofuncs },
-       { 0xb097, &nouveau_object_ofuncs },
+       { MAXWELL_A, &nvc0_fermi_ofuncs },
        { 0xb0c0, &nouveau_object_ofuncs },
        {}
 };
@@ -425,6 +425,9 @@ gm107_graph_init(struct nouveau_object *object)
        nv_wr32(priv, 0x400134, 0xffffffff);
 
        nv_wr32(priv, 0x400054, 0x2c350f63);
+
+       nvc0_graph_zbc_init(priv);
+
        return nvc0_graph_init_ctxctl(priv);
 }
 
index 00ea1a0..01e99fa 100644 (file)
@@ -33,7 +33,7 @@ static struct nouveau_oclass
 nv108_graph_sclass[] = {
        { 0x902d, &nouveau_object_ofuncs },
        { 0xa140, &nouveau_object_ofuncs },
-       { 0xa197, &nouveau_object_ofuncs },
+       { KEPLER_B, &nvc0_fermi_ofuncs },
        { 0xa1c0, &nouveau_object_ofuncs },
        {}
 };
index cda70fc..0156862 100644 (file)
 #include "nvc0.h"
 #include "ctxnvc0.h"
 
+/*******************************************************************************
+ * Zero Bandwidth Clear
+ ******************************************************************************/
+
+static void
+nvc0_graph_zbc_clear_color(struct nvc0_graph_priv *priv, int zbc)
+{
+       if (priv->zbc_color[zbc].format) {
+               nv_wr32(priv, 0x405804, priv->zbc_color[zbc].ds[0]);
+               nv_wr32(priv, 0x405808, priv->zbc_color[zbc].ds[1]);
+               nv_wr32(priv, 0x40580c, priv->zbc_color[zbc].ds[2]);
+               nv_wr32(priv, 0x405810, priv->zbc_color[zbc].ds[3]);
+       }
+       nv_wr32(priv, 0x405814, priv->zbc_color[zbc].format);
+       nv_wr32(priv, 0x405820, zbc);
+       nv_wr32(priv, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
+}
+
+static int
+nvc0_graph_zbc_color_get(struct nvc0_graph_priv *priv, int format,
+                        const u32 ds[4], const u32 l2[4])
+{
+       struct nouveau_ltc *ltc = nouveau_ltc(priv);
+       int zbc = -ENOSPC, i;
+
+       for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+               if (priv->zbc_color[i].format) {
+                       if (priv->zbc_color[i].format != format)
+                               continue;
+                       if (memcmp(priv->zbc_color[i].ds, ds, sizeof(
+                                  priv->zbc_color[i].ds)))
+                               continue;
+                       if (memcmp(priv->zbc_color[i].l2, l2, sizeof(
+                                  priv->zbc_color[i].l2))) {
+                               WARN_ON(1);
+                               return -EINVAL;
+                       }
+                       return i;
+               } else {
+                       zbc = (zbc < 0) ? i : zbc;
+               }
+       }
+
+       memcpy(priv->zbc_color[zbc].ds, ds, sizeof(priv->zbc_color[zbc].ds));
+       memcpy(priv->zbc_color[zbc].l2, l2, sizeof(priv->zbc_color[zbc].l2));
+       priv->zbc_color[zbc].format = format;
+       ltc->zbc_color_get(ltc, zbc, l2);
+       nvc0_graph_zbc_clear_color(priv, zbc);
+       return zbc;
+}
+
+static void
+nvc0_graph_zbc_clear_depth(struct nvc0_graph_priv *priv, int zbc)
+{
+       if (priv->zbc_depth[zbc].format)
+               nv_wr32(priv, 0x405818, priv->zbc_depth[zbc].ds);
+       nv_wr32(priv, 0x40581c, priv->zbc_depth[zbc].format);
+       nv_wr32(priv, 0x405820, zbc);
+       nv_wr32(priv, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
+}
+
+static int
+nvc0_graph_zbc_depth_get(struct nvc0_graph_priv *priv, int format,
+                        const u32 ds, const u32 l2)
+{
+       struct nouveau_ltc *ltc = nouveau_ltc(priv);
+       int zbc = -ENOSPC, i;
+
+       for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+               if (priv->zbc_depth[i].format) {
+                       if (priv->zbc_depth[i].format != format)
+                               continue;
+                       if (priv->zbc_depth[i].ds != ds)
+                               continue;
+                       if (priv->zbc_depth[i].l2 != l2) {
+                               WARN_ON(1);
+                               return -EINVAL;
+                       }
+                       return i;
+               } else {
+                       zbc = (zbc < 0) ? i : zbc;
+               }
+       }
+
+       priv->zbc_depth[zbc].format = format;
+       priv->zbc_depth[zbc].ds = ds;
+       priv->zbc_depth[zbc].l2 = l2;
+       ltc->zbc_depth_get(ltc, zbc, l2);
+       nvc0_graph_zbc_clear_depth(priv, zbc);
+       return zbc;
+}
+
 /*******************************************************************************
  * Graphics object classes
  ******************************************************************************/
 
+static int
+nvc0_fermi_mthd_zbc_color(struct nouveau_object *object, void *data, u32 size)
+{
+       struct nvc0_graph_priv *priv = (void *)object->engine;
+       union {
+               struct fermi_a_zbc_color_v0 v0;
+       } *args = data;
+       int ret;
+
+       if (nvif_unpack(args->v0, 0, 0, false)) {
+               switch (args->v0.format) {
+               case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
+               case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
+               case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
+               case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
+               case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
+               case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
+               case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
+               case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
+               case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
+               case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
+               case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
+               case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
+               case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
+               case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
+               case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
+               case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
+               case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
+               case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
+               case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
+                       ret = nvc0_graph_zbc_color_get(priv, args->v0.format,
+                                                            args->v0.ds,
+                                                            args->v0.l2);
+                       if (ret >= 0) {
+                               args->v0.index = ret;
+                               return 0;
+                       }
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       return ret;
+}
+
+static int
+nvc0_fermi_mthd_zbc_depth(struct nouveau_object *object, void *data, u32 size)
+{
+       struct nvc0_graph_priv *priv = (void *)object->engine;
+       union {
+               struct fermi_a_zbc_depth_v0 v0;
+       } *args = data;
+       int ret;
+
+       if (nvif_unpack(args->v0, 0, 0, false)) {
+               switch (args->v0.format) {
+               case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
+                       ret = nvc0_graph_zbc_depth_get(priv, args->v0.format,
+                                                            args->v0.ds,
+                                                            args->v0.l2);
+                       return (ret >= 0) ? 0 : -ENOSPC;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       return ret;
+}
+
+static int
+nvc0_fermi_mthd(struct nouveau_object *object, u32 mthd, void *data, u32 size)
+{
+       switch (mthd) {
+       case FERMI_A_ZBC_COLOR:
+               return nvc0_fermi_mthd_zbc_color(object, data, size);
+       case FERMI_A_ZBC_DEPTH:
+               return nvc0_fermi_mthd_zbc_depth(object, data, size);
+       default:
+               break;
+       }
+       return -EINVAL;
+}
+
+struct nouveau_ofuncs
+nvc0_fermi_ofuncs = {
+       .ctor = _nouveau_object_ctor,
+       .dtor = nouveau_object_destroy,
+       .init = nouveau_object_init,
+       .fini = nouveau_object_fini,
+       .mthd = nvc0_fermi_mthd,
+};
+
 struct nouveau_oclass
 nvc0_graph_sclass[] = {
        { 0x902d, &nouveau_object_ofuncs },
        { 0x9039, &nouveau_object_ofuncs },
-       { 0x9097, &nouveau_object_ofuncs },
+       { FERMI_A, &nvc0_fermi_ofuncs },
        { 0x90c0, &nouveau_object_ofuncs },
        {}
 };
@@ -406,6 +591,35 @@ nvc0_graph_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+void
+nvc0_graph_zbc_init(struct nvc0_graph_priv *priv)
+{
+       const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                             0x00000000, 0x00000000, 0x00000000, 0x00000000 };
+       const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
+                             0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
+       const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                             0x00000000, 0x00000000, 0x00000000, 0x00000000 };
+       const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
+                             0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
+       struct nouveau_ltc *ltc = nouveau_ltc(priv);
+       int index;
+
+       if (!priv->zbc_color[0].format) {
+               nvc0_graph_zbc_color_get(priv, 1,  & zero[0],   &zero[4]);
+               nvc0_graph_zbc_color_get(priv, 2,  &  one[0],    &one[4]);
+               nvc0_graph_zbc_color_get(priv, 4,  &f32_0[0],  &f32_0[4]);
+               nvc0_graph_zbc_color_get(priv, 4,  &f32_1[0],  &f32_1[4]);
+               nvc0_graph_zbc_depth_get(priv, 1, 0x00000000, 0x00000000);
+               nvc0_graph_zbc_depth_get(priv, 1, 0x3f800000, 0x3f800000);
+       }
+
+       for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
+               nvc0_graph_zbc_clear_color(priv, index);
+       for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
+               nvc0_graph_zbc_clear_depth(priv, index);
+}
+
 void
 nvc0_graph_mmio(struct nvc0_graph_priv *priv, const struct nvc0_graph_pack *p)
 {
@@ -1223,6 +1437,9 @@ nvc0_graph_init(struct nouveau_object *object)
        nv_wr32(priv, 0x400134, 0xffffffff);
 
        nv_wr32(priv, 0x400054, 0x34ce3464);
+
+       nvc0_graph_zbc_init(priv);
+
        return nvc0_graph_init_ctxctl(priv);
 }
 
index a599b4f..bfe9bdd 100644 (file)
 #include <core/gpuobj.h>
 #include <core/option.h>
 
+#include <nvif/unpack.h>
+#include <nvif/class.h>
+
 #include <subdev/fb.h>
 #include <subdev/vm.h>
 #include <subdev/bar.h>
 #include <subdev/timer.h>
 #include <subdev/mc.h>
+#include <subdev/ltc.h>
 
 #include <engine/fifo.h>
 #include <engine/graph.h>
@@ -69,6 +73,18 @@ struct nvc0_graph_fuc {
        u32  size;
 };
 
+struct nvc0_graph_zbc_color {
+       u32 format;
+       u32 ds[4];
+       u32 l2[4];
+};
+
+struct nvc0_graph_zbc_depth {
+       u32 format;
+       u32 ds;
+       u32 l2;
+};
+
 struct nvc0_graph_priv {
        struct nouveau_graph base;
 
@@ -78,6 +94,9 @@ struct nvc0_graph_priv {
        struct nvc0_graph_fuc fuc41ad;
        bool firmware;
 
+       struct nvc0_graph_zbc_color zbc_color[NOUVEAU_LTC_MAX_ZBC_CNT];
+       struct nvc0_graph_zbc_depth zbc_depth[NOUVEAU_LTC_MAX_ZBC_CNT];
+
        u8 rop_nr;
        u8 gpc_nr;
        u8 tpc_nr[GPC_MAX];
@@ -119,11 +138,15 @@ int  nvc0_graph_ctor(struct nouveau_object *, struct nouveau_object *,
                     struct nouveau_object **);
 void nvc0_graph_dtor(struct nouveau_object *);
 int  nvc0_graph_init(struct nouveau_object *);
+void nvc0_graph_zbc_init(struct nvc0_graph_priv *);
+
 int  nve4_graph_fini(struct nouveau_object *, bool);
 int  nve4_graph_init(struct nouveau_object *);
 
 int  nvf0_graph_fini(struct nouveau_object *, bool);
 
+extern struct nouveau_ofuncs nvc0_fermi_ofuncs;
+
 extern struct nouveau_oclass nvc0_graph_sclass[];
 extern struct nouveau_oclass nvc8_graph_sclass[];
 extern struct nouveau_oclass nvf0_graph_sclass[];
index 30cab0b..8009676 100644 (file)
@@ -33,9 +33,9 @@ static struct nouveau_oclass
 nvc1_graph_sclass[] = {
        { 0x902d, &nouveau_object_ofuncs },
        { 0x9039, &nouveau_object_ofuncs },
-       { 0x9097, &nouveau_object_ofuncs },
+       { FERMI_A, &nvc0_fermi_ofuncs },
+       { FERMI_B, &nvc0_fermi_ofuncs },
        { 0x90c0, &nouveau_object_ofuncs },
-       { 0x9197, &nouveau_object_ofuncs },
        {}
 };
 
index a6bf783..c944590 100644 (file)
@@ -33,10 +33,10 @@ struct nouveau_oclass
 nvc8_graph_sclass[] = {
        { 0x902d, &nouveau_object_ofuncs },
        { 0x9039, &nouveau_object_ofuncs },
-       { 0x9097, &nouveau_object_ofuncs },
+       { FERMI_A, &nvc0_fermi_ofuncs },
+       { FERMI_B, &nvc0_fermi_ofuncs },
+       { FERMI_C, &nvc0_fermi_ofuncs },
        { 0x90c0, &nouveau_object_ofuncs },
-       { 0x9197, &nouveau_object_ofuncs },
-       { 0x9297, &nouveau_object_ofuncs },
        {}
 };
 
index fb9cb92..9ba01fb 100644 (file)
@@ -35,7 +35,7 @@ static struct nouveau_oclass
 nve4_graph_sclass[] = {
        { 0x902d, &nouveau_object_ofuncs },
        { 0xa040, &nouveau_object_ofuncs },
-       { 0xa097, &nouveau_object_ofuncs },
+       { KEPLER_A, &nvc0_fermi_ofuncs },
        { 0xa0c0, &nouveau_object_ofuncs },
        {}
 };
@@ -303,6 +303,9 @@ nve4_graph_init(struct nouveau_object *object)
        nv_wr32(priv, 0x400134, 0xffffffff);
 
        nv_wr32(priv, 0x400054, 0x34ce3464);
+
+       nvc0_graph_zbc_init(priv);
+
        return nvc0_graph_init_ctxctl(priv);
 }
 
index 768c51f..b82b40a 100644 (file)
@@ -33,7 +33,7 @@ struct nouveau_oclass
 nvf0_graph_sclass[] = {
        { 0x902d, &nouveau_object_ofuncs },
        { 0xa140, &nouveau_object_ofuncs },
-       { 0xa197, &nouveau_object_ofuncs },
+       { KEPLER_B, &nvc0_fermi_ofuncs },
        { 0xa1c0, &nouveau_object_ofuncs },
        {}
 };
index c9897f4..73bf126 100644 (file)
 #define GF110_DISP_OVERLAY_CONTROL_DMA                               0x0000907e
 #define GK104_DISP_OVERLAY_CONTROL_DMA                               0x0000917e
 
+#define FERMI_A                                                      0x00009097
+#define FERMI_B                                                      0x00009197
+#define FERMI_C                                                      0x00009297
+
+#define KEPLER_A                                                     0x0000a097
+#define KEPLER_B                                                     0x0000a197
+#define KEPLER_C                                                     0x0000a297
+
+#define MAXWELL_A                                                    0x0000b097
+
 
 /*******************************************************************************
  * client
@@ -491,4 +501,50 @@ struct nv50_disp_overlay_v0 {
        __u8  pad02[6];
 };
 
+
+/*******************************************************************************
+ * fermi
+ ******************************************************************************/
+
+#define FERMI_A_ZBC_COLOR                                                  0x00
+#define FERMI_A_ZBC_DEPTH                                                  0x01
+
+struct fermi_a_zbc_color_v0 {
+       __u8  version;
+#define FERMI_A_ZBC_COLOR_V0_FMT_ZERO                                      0x01
+#define FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE                                 0x02
+#define FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32                       0x04
+#define FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16                           0x08
+#define FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16                       0x0c
+#define FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16                       0x10
+#define FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16                       0x14
+#define FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16                       0x16
+#define FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8                                  0x18
+#define FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8                               0x1c
+#define FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10                               0x20
+#define FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10                           0x24
+#define FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8                                  0x28
+#define FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8                               0x2c
+#define FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8                              0x30
+#define FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8                              0x34
+#define FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8                              0x38
+#define FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10                               0x3c
+#define FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11                              0x40
+       __u8  format;
+       __u8  index;
+       __u8  pad03[5];
+       __u32 ds[4];
+       __u32 l2[4];
+};
+
+struct fermi_a_zbc_depth_v0 {
+       __u8  version;
+#define FERMI_A_ZBC_DEPTH_V0_FMT_FP32                                      0x01
+       __u8  format;
+       __u8  index;
+       __u8  pad03[5];
+       __u32 ds;
+       __u32 l2;
+};
+
 #endif