aboutsummaryrefslogtreecommitdiff
path: root/vendor/gioui.org/shader/piet/path_coarse.comp
blob: ea525f5c6f11e1c21c611080111ef4d1092e4063 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

// Coarse rasterization of path segments.

// Allocation and initialization of tiles for paths.

#version 450
#extension GL_GOOGLE_include_directive : enable

#include "mem.h"
#include "setup.h"

#define LG_COARSE_WG 5
#define COARSE_WG (1 << LG_COARSE_WG)

layout(local_size_x = COARSE_WG, local_size_y = 1) in;

layout(set = 0, binding = 1) readonly buffer ConfigBuf {
    Config conf;
};

#include "pathseg.h"
#include "tile.h"

// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))

#define ACCURACY 0.25
#define Q_ACCURACY (ACCURACY * 0.1)
#define REM_ACCURACY (ACCURACY - Q_ACCURACY)
#define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY)
#define MAX_QUADS 16

vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) {
    float mt = 1.0 - t;
    return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t;
}

vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) {
    float mt = 1.0 - t;
    return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t;
}

struct SubdivResult {
    float val;
    float a0;
    float a2;
};

/// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$
///
/// This is used for flattening curves.
#define D 0.67
float approx_parabola_integral(float x) {
    return x * inversesqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x)));
}

/// An approximation to the inverse parabola integral.
#define B 0.39
float approx_parabola_inv_integral(float x) {
    return x * sqrt(1.0 - B + (B * B + 0.25 * x * x));
}

SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
    vec2 d01 = p1 - p0;
    vec2 d12 = p2 - p1;
    vec2 dd = d01 - d12;
    float cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x;
    float x0 = (d01.x * dd.x + d01.y * dd.y) / cross;
    float x2 = (d12.x * dd.x + d12.y * dd.y) / cross;
    float scale = abs(cross / (length(dd) * (x2 - x0)));

    float a0 = approx_parabola_integral(x0);
    float a2 = approx_parabola_integral(x2);
    float val = 0.0;
    if (scale < 1e9) {
        float da = abs(a2 - a0);
        float sqrt_scale = sqrt(scale);
        if (sign(x0) == sign(x2)) {
            val = da * sqrt_scale;
        } else {
            float xmin = sqrt_tol / sqrt_scale;
            val = sqrt_tol * da / approx_parabola_integral(xmin);
        }
    }
    return SubdivResult(val, a0, a2);
}

void main() {
    uint element_ix = gl_GlobalInvocationID.x;
    PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);

    PathSegTag tag = PathSegTag(PathSeg_Nop, 0);
    if (element_ix < conf.n_pathseg) {
        tag = PathSeg_tag(conf.pathseg_alloc, ref);
    }
    bool mem_ok = mem_error == NO_ERROR;
    switch (tag.tag) {
    case PathSeg_Cubic:
        PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);

        uint trans_ix = cubic.trans_ix;
        if (trans_ix > 0) {
            TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
            TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
            cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
            cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
            cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
            cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
        }

        vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
        float err = err_v.x * err_v.x + err_v.y * err_v.y;
        // The number of quadratics.
        uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1);
        n_quads = min(n_quads, MAX_QUADS);
        SubdivResult keep_params[MAX_QUADS];
        // Iterate over quadratics and tote up the estimated number of segments.
        float val = 0.0;
        vec2 qp0 = cubic.p0;
        float step = 1.0 / float(n_quads);
        for (uint i = 0; i < n_quads; i++) {
            float t = float(i + 1) * step;
            vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
            vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
            qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
            SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
            keep_params[i] = params;
            val += params.val;

            qp0 = qp2;
        }
        uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);

        bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
        uint path_ix = cubic.path_ix;
        Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
        Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
        ivec4 bbox = ivec4(path.bbox);
        vec2 p0 = cubic.p0;
        qp0 = cubic.p0;
        float v_step = val / float(n);
        int n_out = 1;
        float val_sum = 0.0;
        for (uint i = 0; i < n_quads; i++) {
            float t = float(i + 1) * step;
            vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
            vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
            qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
            SubdivResult params = keep_params[i];
            float u0 = approx_parabola_inv_integral(params.a0);
            float u2 = approx_parabola_inv_integral(params.a2);
            float uscale = 1.0 / (u2 - u0);
            float target = float(n_out) * v_step;
            while (n_out == n || target < val_sum + params.val) {
                vec2 p1;
                if (n_out == n) {
                    p1 = cubic.p3;
                } else {
                    float u = (target - val_sum) / params.val;
                    float a = mix(params.a0, params.a2, u);
                    float au = approx_parabola_inv_integral(a);
                    float t = (au - u0) * uscale;
                    p1 = eval_quad(qp0, qp1, qp2, t);
                }

                // Output line segment

                // Bounding box of element in pixel coordinates.
                float xmin = min(p0.x, p1.x) - cubic.stroke.x;
                float xmax = max(p0.x, p1.x) + cubic.stroke.x;
                float ymin = min(p0.y, p1.y) - cubic.stroke.y;
                float ymax = max(p0.y, p1.y) + cubic.stroke.y;
                float dx = p1.x - p0.x;
                float dy = p1.y - p0.y;
                // Set up for per-scanline coverage formula, below.
                float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
                float c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
                float b = invslope; // Note: assumes square tiles, otherwise scale.
                float a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;

                int x0 = int(floor(xmin * SX));
                int x1 = int(floor(xmax * SX) + 1);
                int y0 = int(floor(ymin * SY));
                int y1 = int(floor(ymax * SY) + 1);

                x0 = clamp(x0, bbox.x, bbox.z);
                y0 = clamp(y0, bbox.y, bbox.w);
                x1 = clamp(x1, bbox.x, bbox.z);
                y1 = clamp(y1, bbox.y, bbox.w);
                float xc = a + b * float(y0);
                int stride = bbox.z - bbox.x;
                int base = (y0 - bbox.y) * stride - bbox.x;
                // TODO: can be tighter, use c to bound width
                uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
                // Consider using subgroups to aggregate atomic add.
                MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
                if (tile_alloc.failed || !mem_ok) {
                    return;
                }
                uint tile_offset = tile_alloc.alloc.offset;

                TileSeg tile_seg;

                int xray = int(floor(p0.x*SX));
                int last_xray = int(floor(p1.x*SX));
                if (p0.y > p1.y) {
                    int tmp = xray;
                    xray = last_xray;
                    last_xray = tmp;
                }
                for (int y = y0; y < y1; y++) {
                    float tile_y0 = float(y * TILE_HEIGHT_PX);
                    int xbackdrop = max(xray + 1, bbox.x);
                    if (!is_stroke && min(p0.y, p1.y) < tile_y0 && xbackdrop < bbox.z) {
                        int backdrop = p1.y < p0.y ? 1 : -1;
                        TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
                        uint tile_el = tile_ref.offset >> 2;
                        if (touch_mem(path_alloc, tile_el + 1)) {
                            atomicAdd(memory[tile_el + 1], backdrop);
                        }
                    }

                    // next_xray is the xray for the next scanline; the line segment intersects
                    // all tiles between xray and next_xray.
                    int next_xray = last_xray;
                    if (y < y1 - 1) {
                        float tile_y1 = float((y + 1) * TILE_HEIGHT_PX);
                        float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);
                        next_xray = int(floor(x_edge*SX));
                    }

                    int min_xray = min(xray, next_xray);
                    int max_xray = max(xray, next_xray);
                    int xx0 = min(int(floor(xc - c)), min_xray);
                    int xx1 = max(int(ceil(xc + c)), max_xray + 1);
                    xx0 = clamp(xx0, x0, x1);
                    xx1 = clamp(xx1, x0, x1);

                    for (int x = xx0; x < xx1; x++) {
                        float tile_x0 = float(x * TILE_WIDTH_PX);
                        TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
                        uint tile_el = tile_ref.offset >> 2;
                        uint old = 0;
                        if (touch_mem(path_alloc, tile_el)) {
                            old = atomicExchange(memory[tile_el], tile_offset);
                        }
                        tile_seg.origin = p0;
                        tile_seg.vector = p1 - p0;
                        float y_edge = 0.0;
                        if (!is_stroke) {
                            y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
                            if (min(p0.x, p1.x) < tile_x0) {
                                vec2 p = vec2(tile_x0, y_edge);
                                if (p0.x > p1.x) {
                                    tile_seg.vector = p - p0;
                                } else {
                                    tile_seg.origin = p;
                                    tile_seg.vector = p1 - p;
                                }
                                // kernel4 uses sign(vector.x) for the sign of the intersection backdrop.
                                // Nudge zeroes towards the intended sign.
                                if (tile_seg.vector.x == 0) {
                                    tile_seg.vector.x = sign(p1.x - p0.x)*1e-9;
                                }
                            }
                            if (x <= min_xray || max_xray < x) {
                                // Reject inconsistent intersections.
                                y_edge = 1e9;
                            }
                        }
                        tile_seg.y_edge = y_edge;
                        tile_seg.next.offset = old;
                        TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg);
                        tile_offset += TileSeg_size;
                    }
                    xc += b;
                    base += stride;
                    xray = next_xray;
                }

                n_out += 1;
                target += v_step;
                p0 = p1;
            }
            val_sum += params.val;

            qp0 = qp2;
        }

        break;
    }
}