aboutsummaryrefslogtreecommitdiff
path: root/vendor/gioui.org/shader/piet/tile_alloc.comp
blob: 0b6eca495212267bb88736718768ef7549d3c514 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

// Allocation and initialization of tiles for paths.

#version 450
#extension GL_GOOGLE_include_directive : enable

#include "mem.h"
#include "setup.h"

#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)

layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;

layout(set = 0, binding = 1) readonly buffer ConfigBuf {
    Config conf;
};

#include "annotated.h"
#include "tile.h"

// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))

shared uint sh_tile_count[TILE_ALLOC_WG];
shared Alloc sh_tile_alloc;
// Really a bool, but some Metal devices don't accept shared bools.
shared uint sh_tile_alloc_failed;

void main() {
    uint th_ix = gl_LocalInvocationID.x;
    uint element_ix = gl_GlobalInvocationID.x;
    PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
    AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);

    uint tag = Annotated_Nop;
    if (element_ix < conf.n_elements) {
        tag = Annotated_tag(conf.anno_alloc, ref).tag;
    }
    int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
    switch (tag) {
    case Annotated_Color:
    case Annotated_Image:
    case Annotated_BeginClip:
    case Annotated_EndClip:
        // Note: we take advantage of the fact that fills, strokes, and
        // clips have compatible layout.
        AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
        x0 = int(floor(clip.bbox.x * SX));
        y0 = int(floor(clip.bbox.y * SY));
        x1 = int(ceil(clip.bbox.z * SX));
        y1 = int(ceil(clip.bbox.w * SY));
        break;
    }
    x0 = clamp(x0, 0, int(conf.width_in_tiles));
    y0 = clamp(y0, 0, int(conf.height_in_tiles));
    x1 = clamp(x1, 0, int(conf.width_in_tiles));
    y1 = clamp(y1, 0, int(conf.height_in_tiles));

    Path path;
    path.bbox = uvec4(x0, y0, x1, y1);
    uint tile_count = (x1 - x0) * (y1 - y0);
    if (tag == Annotated_EndClip) {
        // Don't actually allocate tiles for an end clip, but we do want
        // the path structure (especially bbox) allocated for it.
        tile_count = 0;
    }

    sh_tile_count[th_ix] = tile_count;
    uint total_tile_count = tile_count;
    // Prefix sum of sh_tile_count
    for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
        barrier();
        if (th_ix >= (1 << i)) {
            total_tile_count += sh_tile_count[th_ix - (1 << i)];
        }
        barrier();
        sh_tile_count[th_ix] = total_tile_count;
    }
    if (th_ix == TILE_ALLOC_WG - 1) {
        MallocResult res = malloc(total_tile_count * Tile_size);
        sh_tile_alloc = res.alloc;
        sh_tile_alloc_failed = res.failed ? 1 : 0;
    }
    barrier();
    if (sh_tile_alloc_failed != 0 || mem_error != NO_ERROR) {
        return;
    }
    Alloc alloc_start = sh_tile_alloc;

    if (element_ix < conf.n_elements) {
        uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
        Alloc tiles_alloc = slice_mem(alloc_start, Tile_size * tile_subix, Tile_size * tile_count);
        path.tiles = TileRef(tiles_alloc.offset);
        Path_write(conf.tile_alloc, path_ref, path);
    }

    // Zero out allocated tiles efficiently
    uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
    uint start_ix = alloc_start.offset >> 2;
    for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
        // Note: this interleaving is faster than using Tile_write
        // by a significant amount.
        write_mem(alloc_start, start_ix + i, 0);
    }
}