vendor/gioui.org/shader/piet/kernel4.comp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248

// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

// This is "kernel 4" in a 4-kernel pipeline. It renders the commands
// in the per-tile command list to an image.

// Right now, this kernel stores the image in a buffer, but a better
// plan is to use a texture. This is because of limited support.

#version 450
#extension GL_GOOGLE_include_directive : enable
#ifdef ENABLE_IMAGE_INDICES
#extension GL_EXT_nonuniform_qualifier : enable
#endif

#include "mem.h"
#include "setup.h"

#define CHUNK_X 2
#define CHUNK_Y 4
#define CHUNK CHUNK_X * CHUNK_Y
#define CHUNK_DX (TILE_WIDTH_PX / CHUNK_X)
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;

layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf {
    Config conf;
};

layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image;

#ifdef ENABLE_IMAGE_INDICES
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[];
#else
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images;
#endif

#include "ptcl.h"
#include "tile.h"

mediump vec3 tosRGB(mediump vec3 rgb) {
    bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
    mediump vec3 below = vec3(12.92)*rgb;
    mediump vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
    return mix(below, above, cutoff);
}

mediump vec3 fromsRGB(mediump vec3 srgb) {
    // Formula from EXT_sRGB.
    bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
    mediump vec3 below = srgb/vec3(12.92);
    mediump vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4));
    return mix(below, above, cutoff);
}

// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
// space.
mediump vec4 unpacksRGB(uint srgba) {
    mediump vec4 color = unpackUnorm4x8(srgba).wzyx;
    return vec4(fromsRGB(color.rgb), color.a);
}

// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent.
uint packsRGB(mediump vec4 rgba) {
    rgba = vec4(tosRGB(rgba.rgb), rgba.a);
    return packUnorm4x8(rgba.wzyx);
}

uvec2 chunk_offset(uint i) {
    return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY);
}

mediump vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
    mediump vec4 rgba[CHUNK];
    for (uint i = 0; i < CHUNK; i++) {
        ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset;
        mediump vec4 fg_rgba;
#ifdef ENABLE_IMAGE_INDICES
        fg_rgba = imageLoad(images[cmd_img.index], uv);
#else
        fg_rgba = imageLoad(images, uv);
#endif
        fg_rgba.rgb = fromsRGB(fg_rgba.rgb);
        rgba[i] = fg_rgba;
    }
    return rgba;
}

void main() {
    uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
    Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
    CmdRef cmd_ref = CmdRef(cmd_alloc.offset);

    // Read scrach space allocation, written first in the command list.
    Alloc scratch_alloc = alloc_read(cmd_alloc, cmd_ref.offset);
    cmd_ref.offset += Alloc_size;

    uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
    vec2 xy = vec2(xy_uint);
    mediump vec4 rgba[CHUNK];
    for (uint i = 0; i < CHUNK; i++) {
        rgba[i] = vec4(0.0);
        // TODO: remove this debug image support when the actual image method is plumbed.
#ifdef DEBUG_IMAGES
#ifdef ENABLE_IMAGE_INDICES
        if (xy_uint.x < 1024 && xy_uint.y < 1024) {
            rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4);
        }
#else
        if (xy_uint.x < 1024 && xy_uint.y < 1024) {
            rgb[i] = imageLoad(images[0], ivec2(xy_uint + chunk_offset(i))/4).rgb;
        }
#endif
#endif
    }

    mediump float area[CHUNK];
    uint clip_depth = 0;
    bool mem_ok = mem_error == NO_ERROR;
    while (mem_ok) {
        uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
        if (tag == Cmd_End) {
            break;
        }
        switch (tag) {
        case Cmd_Stroke:
            // Calculate distance field from all the line segments in this tile.
            CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref);
            mediump float df[CHUNK];
            for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
            TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
            do {
                TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
                vec2 line_vec = seg.vector;
                for (uint k = 0; k < CHUNK; k++) {
                    vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
                    dpos += vec2(chunk_offset(k));
                    float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
                    df[k] = min(df[k], length(line_vec * t - dpos));
                }
                tile_seg_ref = seg.next;
            } while (tile_seg_ref.offset != 0);
            for (uint k = 0; k < CHUNK; k++) {
                area[k] = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
            }
            cmd_ref.offset += 4 + CmdStroke_size;
            break;
        case Cmd_Fill:
            CmdFill fill = Cmd_Fill_read(cmd_alloc, cmd_ref);
            for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
            tile_seg_ref = TileSegRef(fill.tile_ref);
            // Calculate coverage based on backdrop + coverage of each line segment
            do {
                TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
                for (uint k = 0; k < CHUNK; k++) {
                    vec2 my_xy = xy + vec2(chunk_offset(k));
                    vec2 start = seg.origin - my_xy;
                    vec2 end = start + seg.vector;
                    vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
                    if (window.x != window.y) {
                        vec2 t = (window - start.y) / seg.vector.y;
                        vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
                        float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
                        float xmax = max(xs.x, xs.y);
                        float b = min(xmax, 1.0);
                        float c = max(b, 0.0);
                        float d = max(xmin, 0.0);
                        float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
                        area[k] += a * (window.x - window.y);
                    }
                    area[k] += sign(seg.vector.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
                }
                tile_seg_ref = seg.next;
            } while (tile_seg_ref.offset != 0);
            for (uint k = 0; k < CHUNK; k++) {
                area[k] = min(abs(area[k]), 1.0);
            }
            cmd_ref.offset += 4 + CmdFill_size;
            break;
        case Cmd_Solid:
            for (uint k = 0; k < CHUNK; k++) {
                area[k] = 1.0;
            }
            cmd_ref.offset += 4;
            break;
        case Cmd_Alpha:
            CmdAlpha alpha = Cmd_Alpha_read(cmd_alloc, cmd_ref);
            for (uint k = 0; k < CHUNK; k++) {
                area[k] = alpha.alpha;
            }
            cmd_ref.offset += 4 + CmdAlpha_size;
            break;
        case Cmd_Color:
            CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
            mediump vec4 fg = unpacksRGB(color.rgba_color);
            for (uint k = 0; k < CHUNK; k++) {
                mediump vec4 fg_k = fg * area[k];
                rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
            }
            cmd_ref.offset += 4 + CmdColor_size;
            break;
        case Cmd_Image:
            CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
            mediump vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
            for (uint k = 0; k < CHUNK; k++) {
                mediump vec4 fg_k = img[k] * area[k];
                rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
            }
            cmd_ref.offset += 4 + CmdImage_size;
            break;
        case Cmd_BeginClip:
            uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
                gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
            for (uint k = 0; k < CHUNK; k++) {
                uvec2 offset = chunk_offset(k);
                uint srgb = packsRGB(vec4(rgba[k]));
                mediump float alpha = clamp(abs(area[k]), 0.0, 1.0);
                write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb);
                write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha));
                rgba[k] = vec4(0.0);
            }
            clip_depth++;
            cmd_ref.offset += 4;
            break;
        case Cmd_EndClip:
            clip_depth--;
            base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
                gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
            for (uint k = 0; k < CHUNK; k++) {
                uvec2 offset = chunk_offset(k);
                uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
                uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
                mediump vec4 bg = unpacksRGB(srgb);
                mediump vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha);
                rgba[k] = bg * (1.0 - fg.a) + fg;
            }
            cmd_ref.offset += 4;
            break;
        case Cmd_Jump:
            cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref);
            cmd_alloc.offset = cmd_ref.offset;
            break;
        }
    }

    for (uint i = 0; i < CHUNK; i++) {
        imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgba[i].rgb), rgba[i].a));
    }
}