aboutsummaryrefslogtreecommitdiff
path: root/vendor/gioui.org/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gioui.org/cpu')
-rw-r--r--vendor/gioui.org/cpu/LICENSE63
-rw-r--r--vendor/gioui.org/cpu/README.md25
-rw-r--r--vendor/gioui.org/cpu/abi.h91
-rw-r--r--vendor/gioui.org/cpu/driver.go86
-rw-r--r--vendor/gioui.org/cpu/driver_nosupport.go64
-rw-r--r--vendor/gioui.org/cpu/embed.go11
-rw-r--r--vendor/gioui.org/cpu/go.mod3
-rw-r--r--vendor/gioui.org/cpu/go.sum0
-rw-r--r--vendor/gioui.org/cpu/init.sh23
-rw-r--r--vendor/gioui.org/cpu/runtime.c245
-rw-r--r--vendor/gioui.org/cpu/runtime.h45
11 files changed, 656 insertions, 0 deletions
diff --git a/vendor/gioui.org/cpu/LICENSE b/vendor/gioui.org/cpu/LICENSE
new file mode 100644
index 0000000..81f4733
--- /dev/null
+++ b/vendor/gioui.org/cpu/LICENSE
@@ -0,0 +1,63 @@
+This project is provided under the terms of the UNLICENSE or
+the MIT license denoted by the following SPDX identifier:
+
+SPDX-License-Identifier: Unlicense OR MIT
+
+You may use the project under the terms of either license.
+
+Both licenses are reproduced below.
+
+----
+The MIT License (MIT)
+
+Copyright (c) 2019 The Gio authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+---
+
+
+
+---
+The UNLICENSE
+
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <https://unlicense.org/>
+---
diff --git a/vendor/gioui.org/cpu/README.md b/vendor/gioui.org/cpu/README.md
new file mode 100644
index 0000000..4244fe0
--- /dev/null
+++ b/vendor/gioui.org/cpu/README.md
@@ -0,0 +1,25 @@
+# Compile and run compute programs on CPU
+
+This projects contains the compiler for turning Vulkan SPIR-V compute shaders
+into binaries for arm64, arm or amd64, using
+[SwiftShader](https://github.com/eliasnaur/swiftshader) with a few
+modifications. A runtime implemented in C and Go is available for running the
+resulting binaries.
+
+The primary use is to support a CPU-based rendering fallback for
+[Gio](https://gioui.org). In particular, the `gioui.org/shader/piet` package
+contains arm, arm64, amd64 binaries for
+[piet-gpu](https://github.com/linebender/piet-gpu).
+
+# Compiling and running shaders
+
+The `init.sh` script clones the modifed SwiftShader projects and builds it for
+64-bit and 32-bit. SwiftShader is not designed to cross-compile which is why a
+32-bit build is needed to compile shaders for arm.
+
+The `example/run.sh` script demonstrates compiling and running a simple compute
+program.
+
+## Issues and contributions
+
+See the [Gio contribution guide](https://gioui.org/doc/contribute).
diff --git a/vendor/gioui.org/cpu/abi.h b/vendor/gioui.org/cpu/abi.h
new file mode 100644
index 0000000..365d936
--- /dev/null
+++ b/vendor/gioui.org/cpu/abi.h
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: Unlicense OR MIT
+
+#define ALIGN(bytes, type) type __attribute__((aligned(bytes)))
+
+typedef ALIGN(8, uint8_t) byte8[8];
+typedef ALIGN(8, uint16_t) word4[4];
+typedef ALIGN(4, uint32_t) dword;
+typedef ALIGN(16, uint32_t) dword4[4];
+typedef ALIGN(8, uint64_t) qword;
+typedef ALIGN(16, uint64_t) qword2[2];
+typedef ALIGN(16, unsigned int) uint4[4];
+typedef ALIGN(8, uint32_t) dword2[2];
+typedef ALIGN(8, unsigned short) ushort4[4];
+typedef ALIGN(16, float) float4[4];
+typedef ALIGN(16, int) int4[4];
+
+typedef unsigned short half;
+
+typedef unsigned char bool;
+
+enum {
+ MAX_BOUND_DESCRIPTOR_SETS = 4,
+ MAX_DESCRIPTOR_SET_UNIFORM_BUFFERS_DYNAMIC = 8,
+ MAX_DESCRIPTOR_SET_STORAGE_BUFFERS_DYNAMIC = 4,
+ MAX_DESCRIPTOR_SET_COMBINED_BUFFERS_DYNAMIC =
+ MAX_DESCRIPTOR_SET_UNIFORM_BUFFERS_DYNAMIC +
+ MAX_DESCRIPTOR_SET_STORAGE_BUFFERS_DYNAMIC,
+ MAX_PUSH_CONSTANT_SIZE = 128,
+
+ MIN_STORAGE_BUFFER_OFFSET_ALIGNMENT = 256,
+
+ REQUIRED_MEMORY_ALIGNMENT = 16,
+
+ SIMD_WIDTH = 4,
+};
+
+struct image_descriptor {
+ ALIGN(16, void *ptr);
+ int width;
+ int height;
+ int depth;
+ int row_pitch_bytes;
+ int slice_pitch_bytes;
+ int sample_pitch_bytes;
+ int sample_count;
+ int size_in_bytes;
+
+ void *stencil_ptr;
+ int stencil_row_pitch_bytes;
+ int stencil_slice_pitch_bytes;
+ int stencil_sample_pitch_bytes;
+
+ // TODO: unused?
+ void *memoryOwner;
+};
+
+struct buffer_descriptor {
+ ALIGN(16, void *ptr);
+ int size_in_bytes;
+ int robustness_size;
+};
+
+struct program_data {
+ uint8_t *descriptor_sets[MAX_BOUND_DESCRIPTOR_SETS];
+ uint32_t descriptor_dynamic_offsets[MAX_DESCRIPTOR_SET_COMBINED_BUFFERS_DYNAMIC];
+ uint4 num_workgroups;
+ uint4 workgroup_size;
+ uint32_t invocations_per_subgroup;
+ uint32_t subgroups_per_workgroup;
+ uint32_t invocations_per_workgroup;
+ unsigned char push_constants[MAX_PUSH_CONSTANT_SIZE];
+ // Unused.
+ void *constants;
+};
+
+typedef int32_t yield_result;
+
+typedef void * coroutine;
+
+typedef coroutine (*routine_begin)(struct program_data *data,
+ int32_t workgroupX,
+ int32_t workgroupY,
+ int32_t workgroupZ,
+ void *workgroupMemory,
+ int32_t firstSubgroup,
+ int32_t subgroupCount);
+
+typedef bool (*routine_await)(coroutine r, yield_result *res);
+
+typedef void (*routine_destroy)(coroutine r);
+
diff --git a/vendor/gioui.org/cpu/driver.go b/vendor/gioui.org/cpu/driver.go
new file mode 100644
index 0000000..d156e2b
--- /dev/null
+++ b/vendor/gioui.org/cpu/driver.go
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: Unlicense OR MIT
+
+//go:build linux && (arm64 || arm || amd64)
+// +build linux
+// +build arm64 arm amd64
+
+package cpu
+
+/*
+#cgo CFLAGS: -std=c11 -D_POSIX_C_SOURCE=200112L
+
+#include <stdint.h>
+#include <stdlib.h>
+#include "abi.h"
+#include "runtime.h"
+*/
+import "C"
+import (
+ "unsafe"
+)
+
+type (
+ BufferDescriptor = C.struct_buffer_descriptor
+ ImageDescriptor = C.struct_image_descriptor
+ SamplerDescriptor = C.struct_sampler_descriptor
+
+ DispatchContext = C.struct_dispatch_context
+ ThreadContext = C.struct_thread_context
+ ProgramInfo = C.struct_program_info
+)
+
+const Supported = true
+
+func NewBuffer(size int) BufferDescriptor {
+ return C.alloc_buffer(C.size_t(size))
+}
+
+func (d *BufferDescriptor) Data() []byte {
+ return (*(*[1 << 30]byte)(d.ptr))[:d.size_in_bytes:d.size_in_bytes]
+}
+
+func (d *BufferDescriptor) Free() {
+ if d.ptr != nil {
+ C.free(d.ptr)
+ }
+ *d = BufferDescriptor{}
+}
+
+func NewImageRGBA(width, height int) ImageDescriptor {
+ return C.alloc_image_rgba(C.int(width), C.int(height))
+}
+
+func (d *ImageDescriptor) Data() []byte {
+ return (*(*[1 << 30]byte)(d.ptr))[:d.size_in_bytes:d.size_in_bytes]
+}
+
+func (d *ImageDescriptor) Free() {
+ if d.ptr != nil {
+ C.free(d.ptr)
+ }
+ *d = ImageDescriptor{}
+}
+
+func NewDispatchContext() *DispatchContext {
+ return C.alloc_dispatch_context()
+}
+
+func (c *DispatchContext) Free() {
+ C.free_dispatch_context(c)
+}
+
+func (c *DispatchContext) Prepare(numThreads int, prog *ProgramInfo, descSet unsafe.Pointer, x, y, z int) {
+ C.prepare_dispatch(c, C.int(numThreads), prog, (*C.uint8_t)(descSet), C.int(x), C.int(y), C.int(z))
+}
+
+func (c *DispatchContext) Dispatch(threadIdx int, ctx *ThreadContext) {
+ C.dispatch_thread(c, C.int(threadIdx), ctx)
+}
+
+func NewThreadContext() *ThreadContext {
+ return C.alloc_thread_context()
+}
+
+func (c *ThreadContext) Free() {
+ C.free_thread_context(c)
+}
diff --git a/vendor/gioui.org/cpu/driver_nosupport.go b/vendor/gioui.org/cpu/driver_nosupport.go
new file mode 100644
index 0000000..3a118f2
--- /dev/null
+++ b/vendor/gioui.org/cpu/driver_nosupport.go
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: Unlicense OR MIT
+
+//go:build !(linux && (arm64 || arm || amd64))
+// +build !linux !arm64,!arm,!amd64
+
+package cpu
+
+import "unsafe"
+
+type (
+ BufferDescriptor struct{}
+ ImageDescriptor struct{}
+ SamplerDescriptor struct{}
+
+ DispatchContext struct{}
+ ThreadContext struct{}
+ ProgramInfo struct{}
+)
+
+const Supported = false
+
+func NewBuffer(size int) BufferDescriptor {
+ panic("unsupported")
+}
+
+func (d *BufferDescriptor) Data() []byte {
+ panic("unsupported")
+}
+
+func (d *BufferDescriptor) Free() {
+}
+
+func NewImageRGBA(width, height int) ImageDescriptor {
+ panic("unsupported")
+}
+
+func (d *ImageDescriptor) Data() []byte {
+ panic("unsupported")
+}
+
+func (d *ImageDescriptor) Free() {
+}
+
+func NewDispatchContext() *DispatchContext {
+ panic("unsupported")
+}
+
+func (c *DispatchContext) Free() {
+}
+
+func (c *DispatchContext) Prepare(numThreads int, prog *ProgramInfo, descSet unsafe.Pointer, x, y, z int) {
+ panic("unsupported")
+}
+
+func (c *DispatchContext) Dispatch(threadIdx int, ctx *ThreadContext) {
+ panic("unsupported")
+}
+
+func NewThreadContext() *ThreadContext {
+ panic("unsupported")
+}
+
+func (c *ThreadContext) Free() {
+}
diff --git a/vendor/gioui.org/cpu/embed.go b/vendor/gioui.org/cpu/embed.go
new file mode 100644
index 0000000..9d3b944
--- /dev/null
+++ b/vendor/gioui.org/cpu/embed.go
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: Unlicense OR MIT
+
+package cpu
+
+import _ "embed"
+
+//go:embed abi.h
+var ABIH []byte
+
+//go:embed runtime.h
+var RuntimeH []byte
diff --git a/vendor/gioui.org/cpu/go.mod b/vendor/gioui.org/cpu/go.mod
new file mode 100644
index 0000000..46709a0
--- /dev/null
+++ b/vendor/gioui.org/cpu/go.mod
@@ -0,0 +1,3 @@
+module gioui.org/cpu
+
+go 1.17
diff --git a/vendor/gioui.org/cpu/go.sum b/vendor/gioui.org/cpu/go.sum
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/vendor/gioui.org/cpu/go.sum
diff --git a/vendor/gioui.org/cpu/init.sh b/vendor/gioui.org/cpu/init.sh
new file mode 100644
index 0000000..f0f0a9c
--- /dev/null
+++ b/vendor/gioui.org/cpu/init.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+# SPDX-License-Identifier: Unlicense OR MIT
+
+set -e
+
+cd ~/.cache
+git clone https://github.com/eliasnaur/swiftshader
+cd swiftshader
+
+# 32-bit build
+cp -a build build.32bit
+cd build.32bit
+CXX=clang++ CC=clang CFLAGS=-m32 CXXFLAGS=-m32 cmake -DREACTOR_EMIT_ASM_FILE=true -DSWIFTSHADER_BUILD_PVR=false -DSWIFTSHADER_BUILD_TESTS=false -DSWIFTSHADER_BUILD_GLESv2=false -DSWIFTSHADER_BUILD_EGL=false -DSWIFTSHADER_BUILD_ANGLE=false ..
+cmake --build . --parallel 4
+cd ..
+
+# 64-bit build
+cp -a build build.64bit
+cd build.64bit
+CXX=clang++ CC=clang cmake -DREACTOR_EMIT_ASM_FILE=true -DSWIFTSHADER_BUILD_PVR=false -DSWIFTSHADER_BUILD_TESTS=false -DSWIFTSHADER_BUILD_GLESv2=false -DSWIFTSHADER_BUILD_EGL=false -DSWIFTSHADER_BUILD_ANGLE=false ..
+cmake --build . --parallel 4
+cd ..
diff --git a/vendor/gioui.org/cpu/runtime.c b/vendor/gioui.org/cpu/runtime.c
new file mode 100644
index 0000000..f7f6108
--- /dev/null
+++ b/vendor/gioui.org/cpu/runtime.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: Unlicense OR MIT
+
+//go:build linux && (arm64 || arm || amd64)
+// +build linux
+// +build arm64 arm amd64
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "abi.h"
+#include "runtime.h"
+#include "_cgo_export.h"
+
+// coroutines is a FIFO queue of coroutines implemented as a circular
+// buffer.
+struct coroutines {
+ coroutine *routines;
+ // start and end indexes into routines.
+ unsigned int start;
+ unsigned int end;
+ // cap is the capacity of routines.
+ unsigned int cap;
+};
+
+struct dispatch_context {
+ // descriptor_set is the aligned storage for the descriptor set.
+ void *descriptor_set;
+ int desc_set_size;
+
+ int nthreads;
+ bool has_cbarriers;
+ size_t memory_size;
+ // Program entrypoints.
+ routine_begin begin;
+ routine_await await;
+ routine_destroy destroy;
+
+ struct program_data data;
+};
+
+struct thread_context {
+ struct coroutines routines;
+ size_t memory_size;
+ uint8_t *memory;
+};
+
+static void *malloc_align(size_t alignment, size_t size) {
+ void *ptr;
+ int ret = posix_memalign(&ptr, alignment, size);
+ assert(ret == 0);
+ return ptr;
+}
+
+static void coroutines_dump(struct coroutines *routines) {
+ fprintf(stderr, "s: %d e: %d c: %d [", routines->start, routines->end, routines->cap);
+ unsigned int i = routines->start;
+ while (i != routines->end) {
+ fprintf(stderr, "%p,", routines->routines[routines->start]);
+ i = (i + 1)%routines->cap;
+ }
+ fprintf(stderr, "]\n");
+}
+
+static void coroutines_push(struct coroutines *routines, coroutine r) {
+ unsigned int next = routines->end + 1;
+ if (next >= routines->cap) {
+ next = 0;
+ }
+ if (next == routines->start) {
+ unsigned int newcap = routines->cap*2;
+ if (newcap < 10) {
+ newcap = 10;
+ }
+ routines->routines = realloc(routines->routines, newcap*sizeof(coroutine));
+ // Move elements wrapped around the old cap to the newly allocated space.
+ if (routines->end < routines->start) {
+ unsigned int nelems = routines->end;
+ unsigned int max = newcap - routines->cap;
+ // We doubled the space above, so we can assume enough room.
+ assert(nelems <= max);
+ memmove(&routines->routines[routines->cap], &routines->routines[0], nelems*sizeof(coroutine));
+ routines->end += routines->cap;
+ }
+ routines->cap = newcap;
+ next = (routines->end + 1)%routines->cap;
+ }
+ routines->routines[routines->end] = r;
+ routines->end = next;
+}
+
+static bool coroutines_pop(struct coroutines *routines, coroutine *r) {
+ if (routines->start == routines->end) {
+ return 0;
+ }
+ *r = routines->routines[routines->start];
+ routines->start = (routines->start + 1)%routines->cap;
+ return 1;
+}
+
+static void coroutines_free(struct coroutines *routines) {
+ if (routines->routines != NULL) {
+ free(routines->routines);
+ }
+ struct coroutines clr = { 0 }; *routines = clr;
+}
+
+struct dispatch_context *alloc_dispatch_context(void) {
+ struct dispatch_context *c = malloc(sizeof(*c));
+ assert(c != NULL);
+ struct dispatch_context clr = { 0 }; *c = clr;
+ return c;
+}
+
+void free_dispatch_context(struct dispatch_context *c) {
+ if (c->descriptor_set != NULL) {
+ free(c->descriptor_set);
+ c->descriptor_set = NULL;
+ }
+}
+
+struct thread_context *alloc_thread_context(void) {
+ struct thread_context *c = malloc(sizeof(*c));
+ assert(c != NULL);
+ struct thread_context clr = { 0 }; *c = clr;
+ return c;
+}
+
+void free_thread_context(struct thread_context *c) {
+ if (c->memory != NULL) {
+ free(c->memory);
+ }
+ coroutines_free(&c->routines);
+ struct thread_context clr = { 0 }; *c = clr;
+}
+
+struct buffer_descriptor alloc_buffer(size_t size) {
+ void *buf = malloc_align(MIN_STORAGE_BUFFER_OFFSET_ALIGNMENT, size);
+ struct buffer_descriptor desc = {
+ .ptr = buf,
+ .size_in_bytes = size,
+ .robustness_size = size,
+ };
+ return desc;
+}
+
+struct image_descriptor alloc_image_rgba(int width, int height) {
+ size_t size = width*height*4;
+ size = (size + 16 - 1)&~(16 - 1);
+ void *storage = malloc_align(REQUIRED_MEMORY_ALIGNMENT, size);
+ struct image_descriptor desc = { 0 };
+ desc.ptr = storage;
+ desc.width = width;
+ desc.height = height;
+ desc.depth = 1;
+ desc.row_pitch_bytes = width*4;
+ desc.slice_pitch_bytes = size;
+ desc.sample_pitch_bytes = size;
+ desc.sample_count = 1;
+ desc.size_in_bytes = size;
+ return desc;
+}
+
+void prepare_dispatch(struct dispatch_context *ctx, int nthreads, struct program_info *info, uint8_t *desc_set, int ngroupx, int ngroupy, int ngroupz) {
+ if (ctx->desc_set_size < info->desc_set_size) {
+ if (ctx->descriptor_set != NULL) {
+ free(ctx->descriptor_set);
+ }
+ ctx->descriptor_set = malloc_align(16, info->desc_set_size);
+ ctx->desc_set_size = info->desc_set_size;
+ }
+ memcpy(ctx->descriptor_set, desc_set, info->desc_set_size);
+
+ int invocations_per_subgroup = SIMD_WIDTH;
+ int invocations_per_workgroup = info->workgroup_size_x * info->workgroup_size_y * info->workgroup_size_z;
+ int subgroups_per_workgroup = (invocations_per_workgroup + invocations_per_subgroup - 1) / invocations_per_subgroup;
+
+ ctx->has_cbarriers = info->has_cbarriers;
+ ctx->begin = info->begin;
+ ctx->await = info->await;
+ ctx->destroy = info->destroy;
+ ctx->nthreads = nthreads;
+ ctx->memory_size = info->min_memory_size;
+
+ ctx->data.workgroup_size[0] = info->workgroup_size_x;
+ ctx->data.workgroup_size[1] = info->workgroup_size_y;
+ ctx->data.workgroup_size[2] = info->workgroup_size_z;
+ ctx->data.num_workgroups[0] = ngroupx;
+ ctx->data.num_workgroups[1] = ngroupy;
+ ctx->data.num_workgroups[2] = ngroupz;
+ ctx->data.invocations_per_subgroup = invocations_per_subgroup;
+ ctx->data.invocations_per_workgroup = invocations_per_workgroup;
+ ctx->data.subgroups_per_workgroup = subgroups_per_workgroup;
+ ctx->data.descriptor_sets[0] = ctx->descriptor_set;
+}
+
+void dispatch_thread(struct dispatch_context *ctx, int thread_idx, struct thread_context *thread) {
+ if (thread->memory_size < ctx->memory_size) {
+ if (thread->memory != NULL) {
+ free(thread->memory);
+ }
+ // SwiftShader doesn't seem to align shared memory. However, better safe
+ // than subtle errors. Note that the program info generator pads
+ // memory_size to ensure space for alignment.
+ thread->memory = malloc_align(16, ctx->memory_size);
+ thread->memory_size = ctx->memory_size;
+ }
+ uint8_t *memory = thread->memory;
+
+ struct program_data *data = &ctx->data;
+
+ int sx = data->num_workgroups[0];
+ int sy = data->num_workgroups[1];
+ int sz = data->num_workgroups[2];
+ int ngroups = sx * sy * sz;
+
+ for (int i = thread_idx; i < ngroups; i += ctx->nthreads) {
+ int group_id = i;
+ int z = group_id / (sx * sy);
+ group_id -= z * sx * sy;
+ int y = group_id / sx;
+ group_id -= y * sx;
+ int x = group_id;
+ if (ctx->has_cbarriers) {
+ for (int subgroup = 0; subgroup < data->subgroups_per_workgroup; subgroup++) {
+ coroutine r = ctx->begin(data, x, y, z, memory, subgroup, 1);
+ coroutines_push(&thread->routines, r);
+ }
+ } else {
+ coroutine r = ctx->begin(data, x, y, z, memory, 0, data->subgroups_per_workgroup);
+ coroutines_push(&thread->routines, r);
+ }
+ coroutine r;
+ while (coroutines_pop(&thread->routines, &r)) {
+ yield_result res;
+ if (ctx->await(r, &res)) {
+ coroutines_push(&thread->routines, r);
+ } else {
+ ctx->destroy(r);
+ }
+ }
+ }
+}
diff --git a/vendor/gioui.org/cpu/runtime.h b/vendor/gioui.org/cpu/runtime.h
new file mode 100644
index 0000000..cfae912
--- /dev/null
+++ b/vendor/gioui.org/cpu/runtime.h
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: Unlicense OR MIT
+
+#define ATTR_HIDDEN __attribute__ ((visibility ("hidden")))
+
+// program_info contains constant parameters for a program.
+struct program_info {
+ // MinMemorySize is the minimum size of memory passed to dispatch.
+ size_t min_memory_size;
+ // has_cbarriers is 1 when the program contains control barriers.
+ bool has_cbarriers;
+ // desc_set_size is the size of the first descriptor set for the program.
+ size_t desc_set_size;
+ int workgroup_size_x;
+ int workgroup_size_y;
+ int workgroup_size_z;
+ // Program entrypoints.
+ routine_begin begin;
+ routine_await await;
+ routine_destroy destroy;
+};
+
+// dispatch_context contains the information a program dispatch.
+struct dispatch_context;
+
+// thread_context contains the working memory of a batch. It may be
+// reused, but not concurrently.
+struct thread_context;
+
+extern struct buffer_descriptor alloc_buffer(size_t size) ATTR_HIDDEN;
+extern struct image_descriptor alloc_image_rgba(int width, int height) ATTR_HIDDEN;
+
+extern struct dispatch_context *alloc_dispatch_context(void) ATTR_HIDDEN;
+
+extern void free_dispatch_context(struct dispatch_context *c) ATTR_HIDDEN;
+
+extern struct thread_context *alloc_thread_context(void) ATTR_HIDDEN;
+
+extern void free_thread_context(struct thread_context *c) ATTR_HIDDEN;
+
+// prepare_dispatch initializes ctx to run a dispatch of a program distributed
+// among nthreads threads.
+extern void prepare_dispatch(struct dispatch_context *ctx, int nthreads, struct program_info *info, uint8_t *desc_set, int ngroupx, int ngroupy, int ngroupz) ATTR_HIDDEN;
+
+// dispatch_batch executes a dispatch batch.
+extern void dispatch_thread(struct dispatch_context *ctx, int thread_idx, struct thread_context *thread) ATTR_HIDDEN;