diff options
Diffstat (limited to 'vendor/gioui.org/shader/piet/tile_alloc.comp')
-rw-r--r-- | vendor/gioui.org/shader/piet/tile_alloc.comp | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/vendor/gioui.org/shader/piet/tile_alloc.comp b/vendor/gioui.org/shader/piet/tile_alloc.comp new file mode 100644 index 0000000..0b6eca4 --- /dev/null +++ b/vendor/gioui.org/shader/piet/tile_alloc.comp @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// Allocation and initialization of tiles for paths. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "mem.h" +#include "setup.h" + +#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR) +#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG) + +layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in; + +layout(set = 0, binding = 1) readonly buffer ConfigBuf { + Config conf; +}; + +#include "annotated.h" +#include "tile.h" + +// scale factors useful for converting coordinates to tiles +#define SX (1.0 / float(TILE_WIDTH_PX)) +#define SY (1.0 / float(TILE_HEIGHT_PX)) + +shared uint sh_tile_count[TILE_ALLOC_WG]; +shared Alloc sh_tile_alloc; +// Really a bool, but some Metal devices don't accept shared bools. +shared uint sh_tile_alloc_failed; + +void main() { + uint th_ix = gl_LocalInvocationID.x; + uint element_ix = gl_GlobalInvocationID.x; + PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); + AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); + + uint tag = Annotated_Nop; + if (element_ix < conf.n_elements) { + tag = Annotated_tag(conf.anno_alloc, ref).tag; + } + int x0 = 0, y0 = 0, x1 = 0, y1 = 0; + switch (tag) { + case Annotated_Color: + case Annotated_Image: + case Annotated_BeginClip: + case Annotated_EndClip: + // Note: we take advantage of the fact that fills, strokes, and + // clips have compatible layout. + AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref); + x0 = int(floor(clip.bbox.x * SX)); + y0 = int(floor(clip.bbox.y * SY)); + x1 = int(ceil(clip.bbox.z * SX)); + y1 = int(ceil(clip.bbox.w * SY)); + break; + } + x0 = clamp(x0, 0, int(conf.width_in_tiles)); + y0 = clamp(y0, 0, int(conf.height_in_tiles)); + x1 = clamp(x1, 0, int(conf.width_in_tiles)); + y1 = clamp(y1, 0, int(conf.height_in_tiles)); + + Path path; + path.bbox = uvec4(x0, y0, x1, y1); + uint tile_count = (x1 - x0) * (y1 - y0); + if (tag == Annotated_EndClip) { + // Don't actually allocate tiles for an end clip, but we do want + // the path structure (especially bbox) allocated for it. + tile_count = 0; + } + + sh_tile_count[th_ix] = tile_count; + uint total_tile_count = tile_count; + // Prefix sum of sh_tile_count + for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) { + barrier(); + if (th_ix >= (1 << i)) { + total_tile_count += sh_tile_count[th_ix - (1 << i)]; + } + barrier(); + sh_tile_count[th_ix] = total_tile_count; + } + if (th_ix == TILE_ALLOC_WG - 1) { + MallocResult res = malloc(total_tile_count * Tile_size); + sh_tile_alloc = res.alloc; + sh_tile_alloc_failed = res.failed ? 1 : 0; + } + barrier(); + if (sh_tile_alloc_failed != 0 || mem_error != NO_ERROR) { + return; + } + Alloc alloc_start = sh_tile_alloc; + + if (element_ix < conf.n_elements) { + uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0; + Alloc tiles_alloc = slice_mem(alloc_start, Tile_size * tile_subix, Tile_size * tile_count); + path.tiles = TileRef(tiles_alloc.offset); + Path_write(conf.tile_alloc, path_ref, path); + } + + // Zero out allocated tiles efficiently + uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4); + uint start_ix = alloc_start.offset >> 2; + for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) { + // Note: this interleaving is faster than using Tile_write + // by a significant amount. + write_mem(alloc_start, start_ix + i, 0); + } +} |