diff options
Diffstat (limited to 'vendor/gioui.org/gpu/compute.go')
-rw-r--r-- | vendor/gioui.org/gpu/compute.go | 2219 |
1 files changed, 2219 insertions, 0 deletions
diff --git a/vendor/gioui.org/gpu/compute.go b/vendor/gioui.org/gpu/compute.go new file mode 100644 index 0000000..625658c --- /dev/null +++ b/vendor/gioui.org/gpu/compute.go @@ -0,0 +1,2219 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package gpu + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "hash/maphash" + "image" + "image/color" + "image/draw" + "image/png" + "io/ioutil" + "math" + "math/bits" + "runtime" + "sort" + "time" + "unsafe" + + "gioui.org/cpu" + "gioui.org/f32" + "gioui.org/gpu/internal/driver" + "gioui.org/internal/byteslice" + "gioui.org/internal/f32color" + "gioui.org/internal/ops" + "gioui.org/internal/scene" + "gioui.org/layout" + "gioui.org/op" + "gioui.org/shader" + "gioui.org/shader/gio" + "gioui.org/shader/piet" +) + +type compute struct { + ctx driver.Device + + collector collector + enc encoder + texOps []textureOp + viewport image.Point + maxTextureDim int + srgb bool + atlases []*textureAtlas + frameCount uint + moves []atlasMove + + programs struct { + elements computeProgram + tileAlloc computeProgram + pathCoarse computeProgram + backdrop computeProgram + binning computeProgram + coarse computeProgram + kernel4 computeProgram + } + buffers struct { + config sizedBuffer + scene sizedBuffer + state sizedBuffer + memory sizedBuffer + } + output struct { + blitPipeline driver.Pipeline + + buffer sizedBuffer + + uniforms *copyUniforms + uniBuf driver.Buffer + + layerVertices []layerVertex + descriptors *piet.Kernel4DescriptorSetLayout + + nullMaterials driver.Texture + } + // imgAllocs maps imageOpData.handles to allocs. + imgAllocs map[interface{}]*atlasAlloc + // materials contains the pre-processed materials (transformed images for + // now, gradients etc. later) packed in a texture atlas. The atlas is used + // as source in kernel4. + materials struct { + // allocs maps texture ops the their atlases and FillImage offsets. + allocs map[textureKey]materialAlloc + + pipeline driver.Pipeline + buffer sizedBuffer + quads []materialVertex + uniforms struct { + u *materialUniforms + buf driver.Buffer + } + } + timers struct { + profile string + t *timers + compact *timer + render *timer + blit *timer + } + + // CPU fallback fields. + useCPU bool + dispatcher *dispatcher + + // The following fields hold scratch space to avoid garbage. + zeroSlice []byte + memHeader *memoryHeader + conf *config +} + +type materialAlloc struct { + alloc *atlasAlloc + offset image.Point +} + +type layer struct { + rect image.Rectangle + alloc *atlasAlloc + ops []paintOp + materials *textureAtlas +} + +type allocQuery struct { + atlas *textureAtlas + size image.Point + empty bool + format driver.TextureFormat + bindings driver.BufferBinding + nocompact bool +} + +type atlasAlloc struct { + atlas *textureAtlas + rect image.Rectangle + cpu bool + dead bool + frameCount uint +} + +type atlasMove struct { + src *textureAtlas + dstPos image.Point + srcRect image.Rectangle + cpu bool +} + +type textureAtlas struct { + image driver.Texture + format driver.TextureFormat + bindings driver.BufferBinding + hasCPU bool + cpuImage cpu.ImageDescriptor + size image.Point + allocs []*atlasAlloc + packer packer + realized bool + lastFrame uint + compact bool +} + +type copyUniforms struct { + scale [2]float32 + pos [2]float32 + uvScale [2]float32 + _ [8]byte // Pad to 16 bytes. +} + +type materialUniforms struct { + scale [2]float32 + pos [2]float32 + emulatesRGB float32 + _ [12]byte // Pad to 16 bytes +} + +type collector struct { + hasher maphash.Hash + profile bool + reader ops.Reader + states []f32.Affine2D + clear bool + clearColor f32color.RGBA + clipStates []clipState + order []hashIndex + transStack []transEntry + prevFrame opsCollector + frame opsCollector +} + +type transEntry struct { + t f32.Affine2D + relTrans f32.Affine2D +} + +type hashIndex struct { + index int + hash uint64 +} + +type opsCollector struct { + paths []byte + clipCmds []clipCmd + ops []paintOp + layers []layer +} + +type paintOp struct { + clipStack []clipCmd + offset image.Point + state paintKey + intersect f32.Rectangle + hash uint64 + layer int + texOpIdx int +} + +// clipCmd describes a clipping command ready to be used for the compute +// pipeline. +type clipCmd struct { + // union of the bounds of the operations that are clipped. + union f32.Rectangle + state clipKey + path []byte + pathKey ops.Key + absBounds f32.Rectangle +} + +type encoderState struct { + relTrans f32.Affine2D + clip *clipState + + paintKey +} + +// clipKey completely describes a clip operation (along with its path) and is appropriate +// for hashing and equality checks. +type clipKey struct { + bounds f32.Rectangle + strokeWidth float32 + relTrans f32.Affine2D + pathHash uint64 +} + +// paintKey completely defines a paint operation. It is suitable for hashing and +// equality checks. +type paintKey struct { + t f32.Affine2D + matType materialType + // Current paint.ImageOp + image imageOpData + // Current paint.ColorOp, if any. + color color.NRGBA + + // Current paint.LinearGradientOp. + stop1 f32.Point + stop2 f32.Point + color1 color.NRGBA + color2 color.NRGBA +} + +type clipState struct { + absBounds f32.Rectangle + parent *clipState + path []byte + pathKey ops.Key + intersect f32.Rectangle + push bool + + clipKey +} + +type layerVertex struct { + posX, posY float32 + u, v float32 +} + +// materialVertex describes a vertex of a quad used to render a transformed +// material. +type materialVertex struct { + posX, posY float32 + u, v float32 +} + +// textureKey identifies textureOp. +type textureKey struct { + handle interface{} + transform f32.Affine2D + bounds image.Rectangle +} + +// textureOp represents an paintOp that requires texture space. +type textureOp struct { + img imageOpData + key textureKey + // offset is the integer offset separated from key.transform to increase cache hit rate. + off image.Point + // matAlloc is the atlas placement for material. + matAlloc materialAlloc + // imgAlloc is the atlas placement for the source image + imgAlloc *atlasAlloc +} + +type encoder struct { + scene []scene.Command + npath int + npathseg int + ntrans int +} + +type encodeState struct { + trans f32.Affine2D + clip f32.Rectangle +} + +// sizedBuffer holds a GPU buffer, or its equivalent CPU memory. +type sizedBuffer struct { + size int + buffer driver.Buffer + // cpuBuf is initialized when useCPU is true. + cpuBuf cpu.BufferDescriptor +} + +// computeProgram holds a compute program, or its equivalent CPU implementation. +type computeProgram struct { + prog driver.Program + + // CPU fields. + progInfo *cpu.ProgramInfo + descriptors unsafe.Pointer + buffers []*cpu.BufferDescriptor +} + +// config matches Config in setup.h +type config struct { + n_elements uint32 // paths + n_pathseg uint32 + width_in_tiles uint32 + height_in_tiles uint32 + tile_alloc memAlloc + bin_alloc memAlloc + ptcl_alloc memAlloc + pathseg_alloc memAlloc + anno_alloc memAlloc + trans_alloc memAlloc +} + +// memAlloc matches Alloc in mem.h +type memAlloc struct { + offset uint32 + //size uint32 +} + +// memoryHeader matches the header of Memory in mem.h. +type memoryHeader struct { + mem_offset uint32 + mem_error uint32 +} + +// rect is a oriented rectangle. +type rectangle [4]f32.Point + +const ( + layersBindings = driver.BufferBindingShaderStorageWrite | driver.BufferBindingTexture + materialsBindings = driver.BufferBindingFramebuffer | driver.BufferBindingShaderStorageRead + // Materials and layers can share texture storage if their bindings match. + combinedBindings = layersBindings | materialsBindings +) + +// GPU structure sizes and constants. +const ( + tileWidthPx = 32 + tileHeightPx = 32 + ptclInitialAlloc = 1024 + kernel4OutputUnit = 2 + kernel4AtlasUnit = 3 + + pathSize = 12 + binSize = 8 + pathsegSize = 52 + annoSize = 32 + transSize = 24 + stateSize = 60 + stateStride = 4 + 2*stateSize +) + +// mem.h constants. +const ( + memNoError = 0 // NO_ERROR + memMallocFailed = 1 // ERR_MALLOC_FAILED +) + +func newCompute(ctx driver.Device) (*compute, error) { + caps := ctx.Caps() + maxDim := caps.MaxTextureSize + // Large atlas textures cause artifacts due to precision loss in + // shaders. + if cap := 8192; maxDim > cap { + maxDim = cap + } + // The compute programs can only span 128x64 tiles. Limit to 64 for now, and leave the + // complexity of a rectangular limit for later. + if computeCap := 4096; maxDim > computeCap { + maxDim = computeCap + } + g := &compute{ + ctx: ctx, + maxTextureDim: maxDim, + srgb: caps.Features.Has(driver.FeatureSRGB), + conf: new(config), + memHeader: new(memoryHeader), + } + null, err := ctx.NewTexture(driver.TextureFormatRGBA8, 1, 1, driver.FilterNearest, driver.FilterNearest, driver.BufferBindingShaderStorageRead) + if err != nil { + g.Release() + return nil, err + } + g.output.nullMaterials = null + shaders := []struct { + prog *computeProgram + src shader.Sources + info *cpu.ProgramInfo + }{ + {&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo}, + {&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo}, + {&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo}, + {&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo}, + {&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo}, + {&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo}, + {&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo}, + } + if !caps.Features.Has(driver.FeatureCompute) { + if !cpu.Supported { + return nil, errors.New("gpu: missing support for compute programs") + } + g.useCPU = true + } + if g.useCPU { + g.dispatcher = newDispatcher(runtime.NumCPU()) + } + + copyVert, copyFrag, err := newShaders(ctx, gio.Shader_copy_vert, gio.Shader_copy_frag) + if err != nil { + g.Release() + return nil, err + } + defer copyVert.Release() + defer copyFrag.Release() + pipe, err := ctx.NewPipeline(driver.PipelineDesc{ + VertexShader: copyVert, + FragmentShader: copyFrag, + VertexLayout: driver.VertexLayout{ + Inputs: []driver.InputDesc{ + {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, + {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, + }, + Stride: int(unsafe.Sizeof(g.output.layerVertices[0])), + }, + PixelFormat: driver.TextureFormatOutput, + BlendDesc: driver.BlendDesc{ + Enable: true, + SrcFactor: driver.BlendFactorOne, + DstFactor: driver.BlendFactorOneMinusSrcAlpha, + }, + Topology: driver.TopologyTriangles, + }) + if err != nil { + g.Release() + return nil, err + } + g.output.blitPipeline = pipe + g.output.uniforms = new(copyUniforms) + + buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.output.uniforms))) + if err != nil { + g.Release() + return nil, err + } + g.output.uniBuf = buf + + materialVert, materialFrag, err := newShaders(ctx, gio.Shader_material_vert, gio.Shader_material_frag) + if err != nil { + g.Release() + return nil, err + } + defer materialVert.Release() + defer materialFrag.Release() + pipe, err = ctx.NewPipeline(driver.PipelineDesc{ + VertexShader: materialVert, + FragmentShader: materialFrag, + VertexLayout: driver.VertexLayout{ + Inputs: []driver.InputDesc{ + {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, + {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, + }, + Stride: int(unsafe.Sizeof(g.materials.quads[0])), + }, + PixelFormat: driver.TextureFormatRGBA8, + Topology: driver.TopologyTriangles, + }) + if err != nil { + g.Release() + return nil, err + } + g.materials.pipeline = pipe + g.materials.uniforms.u = new(materialUniforms) + + buf, err = ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms.u))) + if err != nil { + g.Release() + return nil, err + } + g.materials.uniforms.buf = buf + + for _, shader := range shaders { + if !g.useCPU { + p, err := ctx.NewComputeProgram(shader.src) + if err != nil { + g.Release() + return nil, err + } + shader.prog.prog = p + } else { + shader.prog.progInfo = shader.info + } + } + if g.useCPU { + { + desc := new(piet.ElementsDescriptorSetLayout) + g.programs.elements.descriptors = unsafe.Pointer(desc) + g.programs.elements.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1(), desc.Binding2(), desc.Binding3()} + } + { + desc := new(piet.Tile_allocDescriptorSetLayout) + g.programs.tileAlloc.descriptors = unsafe.Pointer(desc) + g.programs.tileAlloc.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} + } + { + desc := new(piet.Path_coarseDescriptorSetLayout) + g.programs.pathCoarse.descriptors = unsafe.Pointer(desc) + g.programs.pathCoarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} + } + { + desc := new(piet.BackdropDescriptorSetLayout) + g.programs.backdrop.descriptors = unsafe.Pointer(desc) + g.programs.backdrop.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} + } + { + desc := new(piet.BinningDescriptorSetLayout) + g.programs.binning.descriptors = unsafe.Pointer(desc) + g.programs.binning.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} + } + { + desc := new(piet.CoarseDescriptorSetLayout) + g.programs.coarse.descriptors = unsafe.Pointer(desc) + g.programs.coarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} + } + { + desc := new(piet.Kernel4DescriptorSetLayout) + g.programs.kernel4.descriptors = unsafe.Pointer(desc) + g.programs.kernel4.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} + g.output.descriptors = desc + } + } + return g, nil +} + +func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) { + vert, err = ctx.NewVertexShader(vsrc) + if err != nil { + return + } + frag, err = ctx.NewFragmentShader(fsrc) + if err != nil { + vert.Release() + } + return +} + +func (g *compute) Frame(frameOps *op.Ops, target RenderTarget, viewport image.Point) error { + g.frameCount++ + g.collect(viewport, frameOps) + return g.frame(target) +} + +func (g *compute) collect(viewport image.Point, ops *op.Ops) { + g.viewport = viewport + g.collector.reset() + + g.texOps = g.texOps[:0] + g.collector.collect(ops, viewport, &g.texOps) +} + +func (g *compute) Clear(col color.NRGBA) { + g.collector.clear = true + g.collector.clearColor = f32color.LinearFromSRGB(col) +} + +func (g *compute) frame(target RenderTarget) error { + viewport := g.viewport + defFBO := g.ctx.BeginFrame(target, g.collector.clear, viewport) + defer g.ctx.EndFrame() + + t := &g.timers + if g.collector.profile && t.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) { + t.t = newTimers(g.ctx) + t.compact = t.t.newTimer() + t.render = t.t.newTimer() + t.blit = t.t.newTimer() + } + + if err := g.uploadImages(); err != nil { + return err + } + if err := g.renderMaterials(); err != nil { + return err + } + g.layer(viewport, g.texOps) + t.render.begin() + if err := g.renderLayers(viewport); err != nil { + return err + } + t.render.end() + d := driver.LoadDesc{ + ClearColor: g.collector.clearColor, + } + if g.collector.clear { + g.collector.clear = false + d.Action = driver.LoadActionClear + } + t.blit.begin() + g.blitLayers(d, defFBO, viewport) + t.blit.end() + t.compact.begin() + if err := g.compactAllocs(); err != nil { + return err + } + t.compact.end() + if g.collector.profile && t.t.ready() { + com, ren, blit := t.compact.Elapsed, t.render.Elapsed, t.blit.Elapsed + ft := com + ren + blit + q := 100 * time.Microsecond + ft = ft.Round(q) + com, ren, blit = com.Round(q), ren.Round(q), blit.Round(q) + t.profile = fmt.Sprintf("ft:%7s com: %7s ren:%7s blit:%7s", ft, com, ren, blit) + } + return nil +} + +func (g *compute) dumpAtlases() { + for i, a := range g.atlases { + dump := image.NewRGBA(image.Rectangle{Max: a.size}) + err := driver.DownloadImage(g.ctx, a.image, dump) + if err != nil { + panic(err) + } + nrgba := image.NewNRGBA(dump.Bounds()) + draw.Draw(nrgba, image.Rectangle{}, dump, image.Point{}, draw.Src) + var buf bytes.Buffer + if err := png.Encode(&buf, nrgba); err != nil { + panic(err) + } + if err := ioutil.WriteFile(fmt.Sprintf("dump-%d.png", i), buf.Bytes(), 0600); err != nil { + panic(err) + } + } +} + +func (g *compute) Profile() string { + return g.timers.profile +} + +func (g *compute) compactAllocs() error { + const ( + maxAllocAge = 3 + maxAtlasAge = 10 + ) + atlases := g.atlases + for _, a := range atlases { + if len(a.allocs) > 0 && g.frameCount-a.lastFrame > maxAtlasAge { + a.compact = true + } + } + for len(atlases) > 0 { + var ( + dstAtlas *textureAtlas + format driver.TextureFormat + bindings driver.BufferBinding + ) + g.moves = g.moves[:0] + addedLayers := false + useCPU := false + fill: + for len(atlases) > 0 { + srcAtlas := atlases[0] + allocs := srcAtlas.allocs + if !srcAtlas.compact { + atlases = atlases[1:] + continue + } + if addedLayers && (format != srcAtlas.format || srcAtlas.bindings&bindings != srcAtlas.bindings) { + break + } + format = srcAtlas.format + bindings = srcAtlas.bindings + for len(srcAtlas.allocs) > 0 { + a := srcAtlas.allocs[0] + n := len(srcAtlas.allocs) + if g.frameCount-a.frameCount > maxAllocAge { + a.dead = true + srcAtlas.allocs[0] = srcAtlas.allocs[n-1] + srcAtlas.allocs = srcAtlas.allocs[:n-1] + continue + } + size := a.rect.Size() + alloc, fits := g.atlasAlloc(allocQuery{ + atlas: dstAtlas, + size: size, + format: format, + bindings: bindings, + nocompact: true, + }) + if !fits { + break fill + } + dstAtlas = alloc.atlas + allocs = append(allocs, a) + addedLayers = true + useCPU = useCPU || a.cpu + dstAtlas.allocs = append(dstAtlas.allocs, a) + pos := alloc.rect.Min + g.moves = append(g.moves, atlasMove{ + src: srcAtlas, dstPos: pos, srcRect: a.rect, cpu: a.cpu, + }) + a.atlas = dstAtlas + a.rect = image.Rectangle{Min: pos, Max: pos.Add(a.rect.Size())} + srcAtlas.allocs[0] = srcAtlas.allocs[n-1] + srcAtlas.allocs = srcAtlas.allocs[:n-1] + } + srcAtlas.compact = false + srcAtlas.realized = false + srcAtlas.packer.clear() + srcAtlas.packer.newPage() + srcAtlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) + atlases = atlases[1:] + } + if !addedLayers { + break + } + outputSize := dstAtlas.packer.sizes[0] + if err := g.realizeAtlas(dstAtlas, useCPU, outputSize); err != nil { + return err + } + for _, move := range g.moves { + if !move.cpu { + g.ctx.CopyTexture(dstAtlas.image, move.dstPos, move.src.image, move.srcRect) + } else { + src := move.src.cpuImage.Data() + dst := dstAtlas.cpuImage.Data() + sstride := move.src.size.X * 4 + dstride := dstAtlas.size.X * 4 + copyImage(dst, dstride, move.dstPos, src, sstride, move.srcRect) + } + } + } + for i := len(g.atlases) - 1; i >= 0; i-- { + a := g.atlases[i] + if len(a.allocs) == 0 && g.frameCount-a.lastFrame > maxAtlasAge { + a.Release() + n := len(g.atlases) + g.atlases[i] = g.atlases[n-1] + g.atlases = g.atlases[:n-1] + } + } + return nil +} + +func copyImage(dst []byte, dstStride int, dstPos image.Point, src []byte, srcStride int, srcRect image.Rectangle) { + sz := srcRect.Size() + soff := srcRect.Min.Y*srcStride + srcRect.Min.X*4 + doff := dstPos.Y*dstStride + dstPos.X*4 + rowLen := sz.X * 4 + for y := 0; y < sz.Y; y++ { + srow := src[soff : soff+rowLen] + drow := dst[doff : doff+rowLen] + copy(drow, srow) + soff += srcStride + doff += dstStride + } +} + +func (g *compute) renderLayers(viewport image.Point) error { + layers := g.collector.frame.layers + for len(layers) > 0 { + var materials, dst *textureAtlas + addedLayers := false + g.enc.reset() + for len(layers) > 0 { + l := &layers[0] + if l.alloc != nil { + layers = layers[1:] + continue + } + if materials != nil { + if l.materials != nil && materials != l.materials { + // Only one materials texture per compute pass. + break + } + } else { + materials = l.materials + } + size := l.rect.Size() + alloc, fits := g.atlasAlloc(allocQuery{ + atlas: dst, + empty: true, + format: driver.TextureFormatRGBA8, + bindings: combinedBindings, + // Pad to avoid overlap. + size: size.Add(image.Pt(1, 1)), + }) + if !fits { + // Only one output atlas per compute pass. + break + } + dst = alloc.atlas + dst.compact = true + addedLayers = true + l.alloc = &alloc + dst.allocs = append(dst.allocs, l.alloc) + encodeLayer(*l, alloc.rect.Min, viewport, &g.enc, g.texOps) + layers = layers[1:] + } + if !addedLayers { + break + } + outputSize := dst.packer.sizes[0] + tileDims := image.Point{ + X: (outputSize.X + tileWidthPx - 1) / tileWidthPx, + Y: (outputSize.Y + tileHeightPx - 1) / tileHeightPx, + } + w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx + if err := g.realizeAtlas(dst, g.useCPU, image.Pt(w, h)); err != nil { + return err + } + if err := g.render(materials, dst.image, dst.cpuImage, tileDims, dst.size.X*4); err != nil { + return err + } + } + return nil +} + +func (g *compute) blitLayers(d driver.LoadDesc, fbo driver.Texture, viewport image.Point) { + layers := g.collector.frame.layers + g.output.layerVertices = g.output.layerVertices[:0] + for _, l := range layers { + placef := layout.FPt(l.alloc.rect.Min) + sizef := layout.FPt(l.rect.Size()) + r := layout.FRect(l.rect) + quad := [4]layerVertex{ + {posX: float32(r.Min.X), posY: float32(r.Min.Y), u: placef.X, v: placef.Y}, + {posX: float32(r.Max.X), posY: float32(r.Min.Y), u: placef.X + sizef.X, v: placef.Y}, + {posX: float32(r.Max.X), posY: float32(r.Max.Y), u: placef.X + sizef.X, v: placef.Y + sizef.Y}, + {posX: float32(r.Min.X), posY: float32(r.Max.Y), u: placef.X, v: placef.Y + sizef.Y}, + } + g.output.layerVertices = append(g.output.layerVertices, quad[0], quad[1], quad[3], quad[3], quad[2], quad[1]) + g.ctx.PrepareTexture(l.alloc.atlas.image) + } + if len(g.output.layerVertices) > 0 { + vertexData := byteslice.Slice(g.output.layerVertices) + g.output.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, len(vertexData)) + g.output.buffer.buffer.Upload(vertexData) + } + g.ctx.BeginRenderPass(fbo, d) + defer g.ctx.EndRenderPass() + if len(layers) == 0 { + return + } + g.ctx.Viewport(0, 0, viewport.X, viewport.Y) + g.ctx.BindPipeline(g.output.blitPipeline) + g.ctx.BindVertexBuffer(g.output.buffer.buffer, 0) + start := 0 + for len(layers) > 0 { + count := 0 + atlas := layers[0].alloc.atlas + for len(layers) > 0 { + l := layers[0] + if l.alloc.atlas != atlas { + break + } + layers = layers[1:] + const verticesPerQuad = 6 + count += verticesPerQuad + } + + // Transform positions to clip space: [-1, -1] - [1, 1], and texture + // coordinates to texture space: [0, 0] - [1, 1]. + clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(viewport.X), 2/float32(viewport.Y))).Offset(f32.Pt(-1, -1)) + sx, _, ox, _, sy, oy := clip.Elems() + g.output.uniforms.scale = [2]float32{sx, sy} + g.output.uniforms.pos = [2]float32{ox, oy} + g.output.uniforms.uvScale = [2]float32{1 / float32(atlas.size.X), 1 / float32(atlas.size.Y)} + g.output.uniBuf.Upload(byteslice.Struct(g.output.uniforms)) + g.ctx.BindUniforms(g.output.uniBuf) + g.ctx.BindTexture(0, atlas.image) + g.ctx.DrawArrays(start, count) + start += count + } +} + +func (g *compute) renderMaterials() error { + m := &g.materials + for k, place := range m.allocs { + if place.alloc.dead { + delete(m.allocs, k) + } + } + texOps := g.texOps + for len(texOps) > 0 { + m.quads = m.quads[:0] + var ( + atlas *textureAtlas + imgAtlas *textureAtlas + ) + // A material is clipped to avoid drawing outside its atlas bounds. + // However, imprecision in the clipping may cause a single pixel + // overflow. + var padding = image.Pt(1, 1) + var allocStart int + for len(texOps) > 0 { + op := &texOps[0] + if a, exists := m.allocs[op.key]; exists { + g.touchAlloc(a.alloc) + op.matAlloc = a + texOps = texOps[1:] + continue + } + + if imgAtlas != nil && op.imgAlloc.atlas != imgAtlas { + // Only one image atlas per render pass. + break + } + imgAtlas = op.imgAlloc.atlas + quad := g.materialQuad(imgAtlas.size, op.key.transform, op.img, op.imgAlloc.rect.Min) + boundsf := quadBounds(quad) + bounds := boundRectF(boundsf) + bounds = bounds.Intersect(op.key.bounds) + + size := bounds.Size() + alloc, fits := g.atlasAlloc(allocQuery{ + atlas: atlas, + size: size.Add(padding), + format: driver.TextureFormatRGBA8, + bindings: combinedBindings, + }) + if !fits { + break + } + if atlas == nil { + allocStart = len(alloc.atlas.allocs) + } + atlas = alloc.atlas + alloc.cpu = g.useCPU + offsetf := layout.FPt(bounds.Min.Mul(-1)) + scale := f32.Pt(float32(size.X), float32(size.Y)) + for i := range quad { + // Position quad to match place. + quad[i].posX += offsetf.X + quad[i].posY += offsetf.Y + // Scale to match viewport [0, 1]. + quad[i].posX /= scale.X + quad[i].posY /= scale.Y + } + // Draw quad as two triangles. + m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2]) + if m.allocs == nil { + m.allocs = make(map[textureKey]materialAlloc) + } + atlasAlloc := materialAlloc{ + alloc: &alloc, + offset: bounds.Min.Mul(-1), + } + atlas.allocs = append(atlas.allocs, atlasAlloc.alloc) + m.allocs[op.key] = atlasAlloc + op.matAlloc = atlasAlloc + texOps = texOps[1:] + } + if len(m.quads) == 0 { + break + } + realized := atlas.realized + if err := g.realizeAtlas(atlas, g.useCPU, atlas.packer.sizes[0]); err != nil { + return err + } + // Transform to clip space: [-1, -1] - [1, 1]. + *m.uniforms.u = materialUniforms{ + scale: [2]float32{2, 2}, + pos: [2]float32{-1, -1}, + } + if !g.srgb { + m.uniforms.u.emulatesRGB = 1.0 + } + m.uniforms.buf.Upload(byteslice.Struct(m.uniforms.u)) + vertexData := byteslice.Slice(m.quads) + n := pow2Ceil(len(vertexData)) + m.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, n) + m.buffer.buffer.Upload(vertexData) + var d driver.LoadDesc + if !realized { + d.Action = driver.LoadActionClear + } + g.ctx.PrepareTexture(imgAtlas.image) + g.ctx.BeginRenderPass(atlas.image, d) + g.ctx.BindTexture(0, imgAtlas.image) + g.ctx.BindPipeline(m.pipeline) + g.ctx.BindUniforms(m.uniforms.buf) + g.ctx.BindVertexBuffer(m.buffer.buffer, 0) + newAllocs := atlas.allocs[allocStart:] + for i, a := range newAllocs { + sz := a.rect.Size().Sub(padding) + g.ctx.Viewport(a.rect.Min.X, a.rect.Min.Y, sz.X, sz.Y) + g.ctx.DrawArrays(i*6, 6) + } + g.ctx.EndRenderPass() + if !g.useCPU { + continue + } + src := atlas.image + data := atlas.cpuImage.Data() + for _, a := range newAllocs { + stride := atlas.size.X * 4 + col := a.rect.Min.X * 4 + row := stride * a.rect.Min.Y + off := col + row + src.ReadPixels(a.rect, data[off:], stride) + } + } + return nil +} + +func (g *compute) uploadImages() error { + for k, a := range g.imgAllocs { + if a.dead { + delete(g.imgAllocs, k) + } + } + type upload struct { + pos image.Point + img *image.RGBA + } + var uploads []upload + format := driver.TextureFormatSRGBA + if !g.srgb { + format = driver.TextureFormatRGBA8 + } + // padding is the number of pixels added to the right and below + // images, to avoid atlas filtering artifacts. + const padding = 1 + texOps := g.texOps + for len(texOps) > 0 { + uploads = uploads[:0] + var atlas *textureAtlas + for len(texOps) > 0 { + op := &texOps[0] + if a, exists := g.imgAllocs[op.img.handle]; exists { + g.touchAlloc(a) + op.imgAlloc = a + texOps = texOps[1:] + continue + } + size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding)) + alloc, fits := g.atlasAlloc(allocQuery{ + atlas: atlas, + size: size, + format: format, + bindings: driver.BufferBindingTexture | driver.BufferBindingFramebuffer, + }) + if !fits { + break + } + atlas = alloc.atlas + if g.imgAllocs == nil { + g.imgAllocs = make(map[interface{}]*atlasAlloc) + } + op.imgAlloc = &alloc + atlas.allocs = append(atlas.allocs, op.imgAlloc) + g.imgAllocs[op.img.handle] = op.imgAlloc + uploads = append(uploads, upload{pos: alloc.rect.Min, img: op.img.src}) + texOps = texOps[1:] + } + if len(uploads) == 0 { + break + } + if err := g.realizeAtlas(atlas, false, atlas.packer.sizes[0]); err != nil { + return err + } + for _, u := range uploads { + size := u.img.Bounds().Size() + driver.UploadImage(atlas.image, u.pos, u.img) + rightPadding := image.Pt(padding, size.Y) + atlas.image.Upload(image.Pt(u.pos.X+size.X, u.pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4), 0) + bottomPadding := image.Pt(size.X, padding) + atlas.image.Upload(image.Pt(u.pos.X, u.pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4), 0) + } + } + return nil +} + +func pow2Ceil(v int) int { + exp := bits.Len(uint(v)) + if bits.OnesCount(uint(v)) == 1 { + exp-- + } + return 1 << exp +} + +// materialQuad constructs a quad that represents the transformed image. It returns the quad +// and its bounds. +func (g *compute) materialQuad(imgAtlasSize image.Point, M f32.Affine2D, img imageOpData, uvPos image.Point) [4]materialVertex { + imgSize := layout.FPt(img.src.Bounds().Size()) + sx, hx, ox, hy, sy, oy := M.Elems() + transOff := f32.Pt(ox, oy) + // The 4 corners of the image rectangle transformed by M, excluding its offset, are: + // + // q0: M * (0, 0) q3: M * (w, 0) + // q1: M * (0, h) q2: M * (w, h) + // + // Note that q0 = M*0 = 0, q2 = q1 + q3. + q0 := f32.Pt(0, 0) + q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y) + q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X) + q2 := q1.Add(q3) + q0 = q0.Add(transOff) + q1 = q1.Add(transOff) + q2 = q2.Add(transOff) + q3 = q3.Add(transOff) + + uvPosf := layout.FPt(uvPos) + atlasScale := f32.Pt(1/float32(imgAtlasSize.X), 1/float32(imgAtlasSize.Y)) + uvBounds := f32.Rectangle{ + Min: uvPosf, + Max: uvPosf.Add(imgSize), + } + uvBounds.Min.X *= atlasScale.X + uvBounds.Min.Y *= atlasScale.Y + uvBounds.Max.X *= atlasScale.X + uvBounds.Max.Y *= atlasScale.Y + quad := [4]materialVertex{ + {posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y}, + {posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y}, + {posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y}, + {posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y}, + } + return quad +} + +func quadBounds(q [4]materialVertex) f32.Rectangle { + q0 := f32.Pt(q[0].posX, q[0].posY) + q1 := f32.Pt(q[1].posX, q[1].posY) + q2 := f32.Pt(q[2].posX, q[2].posY) + q3 := f32.Pt(q[3].posX, q[3].posY) + return f32.Rectangle{ + Min: min(min(q0, q1), min(q2, q3)), + Max: max(max(q0, q1), max(q2, q3)), + } +} + +func max(p1, p2 f32.Point) f32.Point { + p := p1 + if p2.X > p.X { + p.X = p2.X + } + if p2.Y > p.Y { + p.Y = p2.Y + } + return p +} + +func min(p1, p2 f32.Point) f32.Point { + p := p1 + if p2.X < p.X { + p.X = p2.X + } + if p2.Y < p.Y { + p.Y = p2.Y + } + return p +} + +func (enc *encoder) encodePath(verts []byte, fillMode int) { + for ; len(verts) >= scene.CommandSize+4; verts = verts[scene.CommandSize+4:] { + cmd := ops.DecodeCommand(verts[4:]) + if cmd.Op() == scene.OpGap { + if fillMode != scene.FillModeNonzero { + // Skip gaps in strokes. + continue + } + // Replace them by a straight line in outlines. + cmd = scene.Line(scene.DecodeGap(cmd)) + } + enc.scene = append(enc.scene, cmd) + enc.npathseg++ + } +} + +func (g *compute) render(images *textureAtlas, dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDims image.Point, stride int) error { + const ( + // wgSize is the largest and most common workgroup size. + wgSize = 128 + // PARTITION_SIZE from elements.comp + partitionSize = 32 * 4 + ) + widthInBins := (tileDims.X + 15) / 16 + heightInBins := (tileDims.Y + 7) / 8 + if widthInBins*heightInBins > wgSize { + return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx) + } + + enc := &g.enc + // Pad scene with zeroes to avoid reading garbage in elements.comp. + scenePadding := partitionSize - len(enc.scene)%partitionSize + enc.scene = append(enc.scene, make([]scene.Command, scenePadding)...) + + scene := byteslice.Slice(enc.scene) + if s := len(scene); s > g.buffers.scene.size { + paddedCap := s * 11 / 10 + if err := g.buffers.scene.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, paddedCap); err != nil { + return err + } + } + g.buffers.scene.upload(scene) + + // alloc is the number of allocated bytes for static buffers. + var alloc uint32 + round := func(v, quantum int) int { + return (v + quantum - 1) &^ (quantum - 1) + } + malloc := func(size int) memAlloc { + size = round(size, 4) + offset := alloc + alloc += uint32(size) + return memAlloc{offset /*, uint32(size)*/} + } + + *g.conf = config{ + n_elements: uint32(enc.npath), + n_pathseg: uint32(enc.npathseg), + width_in_tiles: uint32(tileDims.X), + height_in_tiles: uint32(tileDims.Y), + tile_alloc: malloc(enc.npath * pathSize), + bin_alloc: malloc(round(enc.npath, wgSize) * binSize), + ptcl_alloc: malloc(tileDims.X * tileDims.Y * ptclInitialAlloc), + pathseg_alloc: malloc(enc.npathseg * pathsegSize), + anno_alloc: malloc(enc.npath * annoSize), + trans_alloc: malloc(enc.ntrans * transSize), + } + + numPartitions := (enc.numElements() + 127) / 128 + // clearSize is the atomic partition counter plus flag and 2 states per partition. + clearSize := 4 + numPartitions*stateStride + if clearSize > g.buffers.state.size { + paddedCap := clearSize * 11 / 10 + if err := g.buffers.state.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, paddedCap); err != nil { + return err + } + } + + confData := byteslice.Struct(g.conf) + g.buffers.config.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, len(confData)) + g.buffers.config.upload(confData) + + minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc) + if minSize > g.buffers.memory.size { + // Add space for dynamic GPU allocations. + const sizeBump = 4 * 1024 * 1024 + minSize += sizeBump + if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, minSize); err != nil { + return err + } + } + + for { + *g.memHeader = memoryHeader{ + mem_offset: alloc, + } + g.buffers.memory.upload(byteslice.Struct(g.memHeader)) + g.buffers.state.upload(g.zeros(clearSize)) + + if !g.useCPU { + g.ctx.BeginCompute() + g.ctx.BindImageTexture(kernel4OutputUnit, dst) + img := g.output.nullMaterials + if images != nil { + img = images.image + } + g.ctx.BindImageTexture(kernel4AtlasUnit, img) + } else { + *g.output.descriptors.Binding2() = cpuDst + if images != nil { + *g.output.descriptors.Binding3() = images.cpuImage + } + } + + g.bindBuffers() + g.memoryBarrier() + g.dispatch(g.programs.elements, numPartitions, 1, 1) + g.memoryBarrier() + g.dispatch(g.programs.tileAlloc, (enc.npath+wgSize-1)/wgSize, 1, 1) + g.memoryBarrier() + g.dispatch(g.programs.pathCoarse, (enc.npathseg+31)/32, 1, 1) + g.memoryBarrier() + g.dispatch(g.programs.backdrop, (enc.npath+wgSize-1)/wgSize, 1, 1) + // No barrier needed between backdrop and binning. + g.dispatch(g.programs.binning, (enc.npath+wgSize-1)/wgSize, 1, 1) + g.memoryBarrier() + g.dispatch(g.programs.coarse, widthInBins, heightInBins, 1) + g.memoryBarrier() + g.dispatch(g.programs.kernel4, tileDims.X, tileDims.Y, 1) + g.memoryBarrier() + if !g.useCPU { + g.ctx.EndCompute() + } else { + g.dispatcher.Sync() + } + + if err := g.buffers.memory.download(byteslice.Struct(g.memHeader)); err != nil { + if err == driver.ErrContentLost { + continue + } + return err + } + switch errCode := g.memHeader.mem_error; errCode { + case memNoError: + if g.useCPU { + w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx + dst.Upload(image.Pt(0, 0), image.Pt(w, h), cpuDst.Data(), stride) + } + return nil + case memMallocFailed: + // Resize memory and try again. + sz := g.buffers.memory.size * 15 / 10 + if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, sz); err != nil { + return err + } + continue + default: + return fmt.Errorf("compute: shader program failed with error %d", errCode) + } + } +} + +func (g *compute) memoryBarrier() { + if g.useCPU { + g.dispatcher.Barrier() + } +} + +func (g *compute) dispatch(p computeProgram, x, y, z int) { + if !g.useCPU { + g.ctx.BindProgram(p.prog) + g.ctx.DispatchCompute(x, y, z) + } else { + g.dispatcher.Dispatch(p.progInfo, p.descriptors, x, y, z) + } +} + +// zeros returns a byte slice with size bytes of zeros. +func (g *compute) zeros(size int) []byte { + if cap(g.zeroSlice) < size { + g.zeroSlice = append(g.zeroSlice, make([]byte, size)...) + } + return g.zeroSlice[:size] +} + +func (g *compute) touchAlloc(a *atlasAlloc) { + if a.dead { + panic("re-use of dead allocation") + } + a.frameCount = g.frameCount + a.atlas.lastFrame = a.frameCount +} + +func (g *compute) atlasAlloc(q allocQuery) (atlasAlloc, bool) { + var ( + place placement + fits bool + atlas = q.atlas + ) + if atlas != nil { + place, fits = atlas.packer.tryAdd(q.size) + if !fits { + atlas.compact = true + } + } + if atlas == nil { + // Look for matching atlas to re-use. + for _, a := range g.atlases { + if q.empty && len(a.allocs) > 0 { + continue + } + if q.nocompact && a.compact { + continue + } + if a.format != q.format || a.bindings&q.bindings != q.bindings { + continue + } + place, fits = a.packer.tryAdd(q.size) + if !fits { + a.compact = true + continue + } + atlas = a + break + } + } + if atlas == nil { + atlas = &textureAtlas{ + format: q.format, + bindings: q.bindings, + } + atlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) + atlas.packer.newPage() + g.atlases = append(g.atlases, atlas) + place, fits = atlas.packer.tryAdd(q.size) + if !fits { + panic(fmt.Errorf("compute: atlas allocation too large (%v)", q.size)) + } + } + if !fits { + return atlasAlloc{}, false + } + atlas.lastFrame = g.frameCount + return atlasAlloc{ + frameCount: g.frameCount, + atlas: atlas, + rect: image.Rectangle{Min: place.Pos, Max: place.Pos.Add(q.size)}, + }, true +} + +func (g *compute) realizeAtlas(atlas *textureAtlas, useCPU bool, size image.Point) error { + defer func() { + atlas.packer.maxDims = atlas.size + atlas.realized = true + atlas.ensureCPUImage(useCPU) + }() + if atlas.size.X >= size.X && atlas.size.Y >= size.Y { + return nil + } + if atlas.realized { + panic("resizing a realized atlas") + } + if err := atlas.resize(g.ctx, size); err != nil { + return err + } + return nil +} + +func (a *textureAtlas) resize(ctx driver.Device, size image.Point) error { + a.Release() + + img, err := ctx.NewTexture(a.format, size.X, size.Y, + driver.FilterNearest, + driver.FilterNearest, + a.bindings) + if err != nil { + return err + } + a.image = img + a.size = size + return nil +} + +func (a *textureAtlas) ensureCPUImage(useCPU bool) { + if !useCPU || a.hasCPU { + return + } + a.hasCPU = true + a.cpuImage = cpu.NewImageRGBA(a.size.X, a.size.Y) +} + +func (g *compute) Release() { + if g.useCPU { + g.dispatcher.Stop() + } + type resource interface { + Release() + } + res := []resource{ + g.output.nullMaterials, + &g.programs.elements, + &g.programs.tileAlloc, + &g.programs.pathCoarse, + &g.programs.backdrop, + &g.programs.binning, + &g.programs.coarse, + &g.programs.kernel4, + g.output.blitPipeline, + &g.output.buffer, + g.output.uniBuf, + &g.buffers.scene, + &g.buffers.state, + &g.buffers.memory, + &g.buffers.config, + g.materials.pipeline, + &g.materials.buffer, + g.materials.uniforms.buf, + g.timers.t, + } + for _, r := range res { + if r != nil { + r.Release() + } + } + for _, a := range g.atlases { + a.Release() + } + g.ctx.Release() + *g = compute{} +} + +func (a *textureAtlas) Release() { + if a.image != nil { + a.image.Release() + a.image = nil + } + a.cpuImage.Free() + a.hasCPU = false +} + +func (g *compute) bindBuffers() { + g.bindStorageBuffers(g.programs.elements, g.buffers.memory, g.buffers.config, g.buffers.scene, g.buffers.state) + g.bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory, g.buffers.config) + g.bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory, g.buffers.config) + g.bindStorageBuffers(g.programs.backdrop, g.buffers.memory, g.buffers.config) + g.bindStorageBuffers(g.programs.binning, g.buffers.memory, g.buffers.config) + g.bindStorageBuffers(g.programs.coarse, g.buffers.memory, g.buffers.config) + g.bindStorageBuffers(g.programs.kernel4, g.buffers.memory, g.buffers.config) +} + +func (p *computeProgram) Release() { + if p.prog != nil { + p.prog.Release() + } + *p = computeProgram{} +} + +func (b *sizedBuffer) Release() { + if b.buffer != nil { + b.buffer.Release() + } + b.cpuBuf.Free() + *b = sizedBuffer{} +} + +func (b *sizedBuffer) ensureCapacity(useCPU bool, ctx driver.Device, binding driver.BufferBinding, size int) error { + if b.size >= size { + return nil + } + if b.buffer != nil { + b.Release() + } + b.cpuBuf.Free() + if !useCPU { + buf, err := ctx.NewBuffer(binding, size) + if err != nil { + return err + } + b.buffer = buf + } else { + b.cpuBuf = cpu.NewBuffer(size) + } + b.size = size + return nil +} + +func (b *sizedBuffer) download(data []byte) error { + if b.buffer != nil { + return b.buffer.Download(data) + } else { + copy(data, b.cpuBuf.Data()) + return nil + } +} + +func (b *sizedBuffer) upload(data []byte) { + if b.buffer != nil { + b.buffer.Upload(data) + } else { + copy(b.cpuBuf.Data(), data) + } +} + +func (g *compute) bindStorageBuffers(prog computeProgram, buffers ...sizedBuffer) { + for i, buf := range buffers { + if !g.useCPU { + g.ctx.BindStorageBuffer(i, buf.buffer) + } else { + *prog.buffers[i] = buf.cpuBuf + } + } +} + +var bo = binary.LittleEndian + +func (e *encoder) reset() { + e.scene = e.scene[:0] + e.npath = 0 + e.npathseg = 0 + e.ntrans = 0 +} + +func (e *encoder) numElements() int { + return len(e.scene) +} + +func (e *encoder) append(e2 encoder) { + e.scene = append(e.scene, e2.scene...) + e.npath += e2.npath + e.npathseg += e2.npathseg + e.ntrans += e2.ntrans +} + +func (e *encoder) transform(m f32.Affine2D) { + e.scene = append(e.scene, scene.Transform(m)) + e.ntrans++ +} + +func (e *encoder) lineWidth(width float32) { + e.scene = append(e.scene, scene.SetLineWidth(width)) +} + +func (e *encoder) fillMode(mode scene.FillMode) { + e.scene = append(e.scene, scene.SetFillMode(mode)) +} + +func (e *encoder) beginClip(bbox f32.Rectangle) { + e.scene = append(e.scene, scene.BeginClip(bbox)) + e.npath++ +} + +func (e *encoder) endClip(bbox f32.Rectangle) { + e.scene = append(e.scene, scene.EndClip(bbox)) + e.npath++ +} + +func (e *encoder) rect(r f32.Rectangle) { + // Rectangle corners, clock-wise. + c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y) + e.line(c0, c1) + e.line(c1, c2) + e.line(c2, c3) + e.line(c3, c0) +} + +func (e *encoder) fillColor(col color.RGBA) { + e.scene = append(e.scene, scene.FillColor(col)) + e.npath++ +} + +func (e *encoder) fillImage(index int, offset image.Point) { + e.scene = append(e.scene, scene.FillImage(index, offset)) + e.npath++ +} + +func (e *encoder) line(start, end f32.Point) { + e.scene = append(e.scene, scene.Line(start, end)) + e.npathseg++ +} + +func (e *encoder) quad(start, ctrl, end f32.Point) { + e.scene = append(e.scene, scene.Quad(start, ctrl, end)) + e.npathseg++ +} + +func (c *collector) reset() { + c.prevFrame, c.frame = c.frame, c.prevFrame + c.profile = false + c.clipStates = c.clipStates[:0] + c.transStack = c.transStack[:0] + c.frame.reset() +} + +func (c *opsCollector) reset() { + c.paths = c.paths[:0] + c.clipCmds = c.clipCmds[:0] + c.ops = c.ops[:0] + c.layers = c.layers[:0] +} + +func (c *collector) addClip(state *encoderState, viewport, bounds f32.Rectangle, path []byte, key ops.Key, hash uint64, strokeWidth float32, push bool) { + // Rectangle clip regions. + if len(path) == 0 && !push { + // If the rectangular clip region contains a previous path it can be discarded. + p := state.clip + t := state.relTrans.Invert() + for p != nil { + // rect is the parent bounds transformed relative to the rectangle. + rect := transformBounds(t, p.bounds) + if rect.In(bounds) { + return + } + t = p.relTrans.Invert().Mul(t) + p = p.parent + } + } + + absBounds := transformBounds(state.t, bounds).Bounds() + intersect := absBounds + if state.clip != nil { + intersect = state.clip.intersect.Intersect(intersect) + } + c.clipStates = append(c.clipStates, clipState{ + parent: state.clip, + absBounds: absBounds, + path: path, + pathKey: key, + intersect: intersect, + clipKey: clipKey{ + bounds: bounds, + relTrans: state.relTrans, + strokeWidth: strokeWidth, + pathHash: hash, + }, + }) + state.clip = &c.clipStates[len(c.clipStates)-1] + state.relTrans = f32.Affine2D{} +} + +func (c *collector) collect(root *op.Ops, viewport image.Point, texOps *[]textureOp) { + fview := f32.Rectangle{Max: layout.FPt(viewport)} + var intOps *ops.Ops + if root != nil { + intOps = &root.Internal + } + c.reader.Reset(intOps) + var state encoderState + reset := func() { + state = encoderState{ + paintKey: paintKey{ + color: color.NRGBA{A: 0xff}, + }, + } + } + reset() + r := &c.reader + var ( + pathData struct { + data []byte + key ops.Key + hash uint64 + } + strWidth float32 + ) + c.addClip(&state, fview, fview, nil, ops.Key{}, 0, 0, false) + for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() { + switch ops.OpType(encOp.Data[0]) { + case ops.TypeProfile: + c.profile = true + case ops.TypeTransform: + dop, push := ops.DecodeTransform(encOp.Data) + if push { + c.transStack = append(c.transStack, transEntry{t: state.t, relTrans: state.relTrans}) + } + state.t = state.t.Mul(dop) + state.relTrans = state.relTrans.Mul(dop) + case ops.TypePopTransform: + n := len(c.transStack) + st := c.transStack[n-1] + c.transStack = c.transStack[:n-1] + state.t = st.t + state.relTrans = st.relTrans + case ops.TypeStroke: + strWidth = decodeStrokeOp(encOp.Data) + case ops.TypePath: + hash := bo.Uint64(encOp.Data[1:]) + encOp, ok = r.Decode() + if !ok { + panic("unexpected end of path operation") + } + pathData.data = encOp.Data[ops.TypeAuxLen:] + pathData.key = encOp.Key + pathData.hash = hash + case ops.TypeClip: + var op ops.ClipOp + op.Decode(encOp.Data) + bounds := layout.FRect(op.Bounds) + c.addClip(&state, fview, bounds, pathData.data, pathData.key, pathData.hash, strWidth, true) + pathData.data = nil + strWidth = 0 + case ops.TypePopClip: + state.relTrans = state.clip.relTrans.Mul(state.relTrans) + state.clip = state.clip.parent + case ops.TypeColor: + state.matType = materialColor + state.color = decodeColorOp(encOp.Data) + case ops.TypeLinearGradient: + state.matType = materialLinearGradient + op := decodeLinearGradientOp(encOp.Data) + state.stop1 = op.stop1 + state.stop2 = op.stop2 + state.color1 = op.color1 + state.color2 = op.color2 + case ops.TypeImage: + state.matType = materialTexture + state.image = decodeImageOp(encOp.Data, encOp.Refs) + case ops.TypePaint: + paintState := state + if paintState.matType == materialTexture { + // Clip to the bounds of the image, to hide other images in the atlas. + sz := state.image.src.Rect.Size() + bounds := f32.Rectangle{Max: layout.FPt(sz)} + c.addClip(&paintState, fview, bounds, nil, ops.Key{}, 0, 0, false) + } + intersect := paintState.clip.intersect + if intersect.Empty() { + break + } + + // If the paint is a uniform opaque color that takes up the whole + // screen, it covers all previous paints and we can discard all + // rendering commands recorded so far. + if paintState.clip == nil && paintState.matType == materialColor && paintState.color.A == 255 { + c.clearColor = f32color.LinearFromSRGB(paintState.color).Opaque() + c.clear = true + c.frame.reset() + break + } + + // Flatten clip stack. + p := paintState.clip + startIdx := len(c.frame.clipCmds) + for p != nil { + idx := len(c.frame.paths) + c.frame.paths = append(c.frame.paths, make([]byte, len(p.path))...) + path := c.frame.paths[idx:] + copy(path, p.path) + c.frame.clipCmds = append(c.frame.clipCmds, clipCmd{ + state: p.clipKey, + path: path, + pathKey: p.pathKey, + absBounds: p.absBounds, + }) + p = p.parent + } + clipStack := c.frame.clipCmds[startIdx:] + c.frame.ops = append(c.frame.ops, paintOp{ + clipStack: clipStack, + state: paintState.paintKey, + intersect: intersect, + }) + case ops.TypeSave: + id := ops.DecodeSave(encOp.Data) + c.save(id, state.t) + case ops.TypeLoad: + reset() + id := ops.DecodeLoad(encOp.Data) + state.t = c.states[id] + state.relTrans = state.t + } + } + for i := range c.frame.ops { + op := &c.frame.ops[i] + // For each clip, cull rectangular clip regions that contain its + // (transformed) bounds. addClip already handled the converse case. + // TODO: do better than O(n²) to efficiently deal with deep stacks. + for j := 0; j < len(op.clipStack)-1; j++ { + cl := op.clipStack[j] + p := cl.state + r := transformBounds(p.relTrans, p.bounds) + for k := j + 1; k < len(op.clipStack); k++ { + cl2 := op.clipStack[k] + p2 := cl2.state + if len(cl2.path) == 0 && r.In(cl2.state.bounds) { + op.clipStack = append(op.clipStack[:k], op.clipStack[k+1:]...) + k-- + op.clipStack[k].state.relTrans = p2.relTrans.Mul(op.clipStack[k].state.relTrans) + } + r = transformRect(p2.relTrans, r) + } + } + // Separate the integer offset from the first transform. Two ops that differ + // only in integer offsets may share backing storage. + if len(op.clipStack) > 0 { + c := &op.clipStack[len(op.clipStack)-1] + t := c.state.relTrans + t, off := separateTransform(t) + c.state.relTrans = t + op.offset = off + op.state.t = op.state.t.Offset(layout.FPt(off.Mul(-1))) + } + op.hash = c.hashOp(*op) + op.texOpIdx = -1 + switch op.state.matType { + case materialTexture: + op.texOpIdx = len(*texOps) + // Separate integer offset from transformation. TextureOps that have identical transforms + // except for their integer offsets can share a transformed image. + t := op.state.t.Offset(layout.FPt(op.offset)) + t, off := separateTransform(t) + bounds := boundRectF(op.intersect).Sub(off) + *texOps = append(*texOps, textureOp{ + img: op.state.image, + off: off, + key: textureKey{ + bounds: bounds, + transform: t, + handle: op.state.image.handle, + }, + }) + } + } +} + +func (c *collector) hashOp(op paintOp) uint64 { + c.hasher.Reset() + for _, cl := range op.clipStack { + k := cl.state + keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) + c.hasher.Write(keyBytes[:]) + } + k := op.state + keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) + c.hasher.Write(keyBytes[:]) + return c.hasher.Sum64() +} + +func (g *compute) layer(viewport image.Point, texOps []textureOp) { + // Sort ops from previous frames by hash. + c := &g.collector + prevOps := c.prevFrame.ops + c.order = c.order[:0] + for i, op := range prevOps { + c.order = append(c.order, hashIndex{ + index: i, + hash: op.hash, + }) + } + sort.Slice(c.order, func(i, j int) bool { + return c.order[i].hash < c.order[j].hash + }) + // Split layers with different materials atlas; the compute stage has only + // one materials slot. + splitLayer := func(ops []paintOp, prevLayerIdx int) { + for len(ops) > 0 { + var materials *textureAtlas + idx := 0 + for idx < len(ops) { + if i := ops[idx].texOpIdx; i != -1 { + omats := texOps[i].matAlloc.alloc.atlas + if materials != nil && omats != nil && omats != materials { + break + } + materials = omats + } + idx++ + } + l := layer{ops: ops[:idx], materials: materials} + if prevLayerIdx != -1 { + prev := c.prevFrame.layers[prevLayerIdx] + if !prev.alloc.dead && len(prev.ops) == len(l.ops) { + l.alloc = prev.alloc + l.materials = prev.materials + g.touchAlloc(l.alloc) + } + } + for i, op := range l.ops { + l.rect = l.rect.Union(boundRectF(op.intersect)) + l.ops[i].layer = len(c.frame.layers) + } + c.frame.layers = append(c.frame.layers, l) + ops = ops[idx:] + } + } + ops := c.frame.ops + idx := 0 + for idx < len(ops) { + op := ops[idx] + // Search for longest matching op sequence. + // start is the earliest index of a match. + start := searchOp(c.order, op.hash) + layerOps, prevLayerIdx := longestLayer(prevOps, c.order[start:], ops[idx:]) + if len(layerOps) == 0 { + idx++ + continue + } + if unmatched := ops[:idx]; len(unmatched) > 0 { + // Flush layer of unmatched ops. + splitLayer(unmatched, -1) + ops = ops[idx:] + idx = 0 + } + splitLayer(layerOps, prevLayerIdx) + ops = ops[len(layerOps):] + } + if len(ops) > 0 { + splitLayer(ops, -1) + } +} + +func longestLayer(prev []paintOp, order []hashIndex, ops []paintOp) ([]paintOp, int) { + longest := 0 + longestIdx := -1 +outer: + for len(order) > 0 { + first := order[0] + order = order[1:] + match := prev[first.index:] + // Potential match found. Now find longest matching sequence. + end := 0 + layer := match[0].layer + off := match[0].offset.Sub(ops[0].offset) + for end < len(match) && end < len(ops) { + m := match[end] + o := ops[end] + // End layers on previous match. + if m.layer != layer { + break + } + // End layer when the next op doesn't match. + if m.hash != o.hash { + if end == 0 { + // Hashes are sorted so if the first op doesn't match, no + // more matches are possible. + break outer + } + break + } + if !opEqual(off, m, o) { + break + } + end++ + } + if end > longest { + longest = end + longestIdx = layer + + } + } + return ops[:longest], longestIdx +} + +func searchOp(order []hashIndex, hash uint64) int { + lo, hi := 0, len(order) + for lo < hi { + mid := (lo + hi) / 2 + if order[mid].hash < hash { + lo = mid + 1 + } else { + hi = mid + } + } + return lo +} + +func opEqual(off image.Point, o1 paintOp, o2 paintOp) bool { + if len(o1.clipStack) != len(o2.clipStack) { + return false + } + if o1.state != o2.state { + return false + } + if o1.offset.Sub(o2.offset) != off { + return false + } + for i, cl1 := range o1.clipStack { + cl2 := o2.clipStack[i] + if len(cl1.path) != len(cl2.path) { + return false + } + if cl1.state != cl2.state { + return false + } + if cl1.pathKey != cl2.pathKey && !bytes.Equal(cl1.path, cl2.path) { + return false + } + } + return true +} + +func encodeLayer(l layer, pos image.Point, viewport image.Point, enc *encoder, texOps []textureOp) { + off := pos.Sub(l.rect.Min) + offf := layout.FPt(off) + + enc.transform(f32.Affine2D{}.Offset(offf)) + for _, op := range l.ops { + encodeOp(viewport, off, enc, texOps, op) + } + enc.transform(f32.Affine2D{}.Offset(offf.Mul(-1))) +} + +func encodeOp(viewport image.Point, absOff image.Point, enc *encoder, texOps []textureOp, op paintOp) { + // Fill in clip bounds, which the shaders expect to be the union + // of all affected bounds. + var union f32.Rectangle + for i, cl := range op.clipStack { + union = union.Union(cl.absBounds) + op.clipStack[i].union = union + } + + absOfff := layout.FPt(absOff) + fillMode := scene.FillModeNonzero + opOff := layout.FPt(op.offset) + inv := f32.Affine2D{}.Offset(opOff) + enc.transform(inv) + for i := len(op.clipStack) - 1; i >= 0; i-- { + cl := op.clipStack[i] + if w := cl.state.strokeWidth; w > 0 { + enc.fillMode(scene.FillModeStroke) + enc.lineWidth(w) + fillMode = scene.FillModeStroke + } else if fillMode != scene.FillModeNonzero { + enc.fillMode(scene.FillModeNonzero) + fillMode = scene.FillModeNonzero + } + enc.transform(cl.state.relTrans) + inv = inv.Mul(cl.state.relTrans) + if len(cl.path) == 0 { + enc.rect(cl.state.bounds) + } else { + enc.encodePath(cl.path, fillMode) + } + if i != 0 { + enc.beginClip(cl.union.Add(absOfff)) + } + } + if len(op.clipStack) == 0 { + // No clipping; fill the entire view. + enc.rect(f32.Rectangle{Max: layout.FPt(viewport)}) + } + + switch op.state.matType { + case materialTexture: + texOp := texOps[op.texOpIdx] + off := texOp.matAlloc.alloc.rect.Min.Add(texOp.matAlloc.offset).Sub(texOp.off).Sub(absOff) + enc.fillImage(0, off) + case materialColor: + enc.fillColor(f32color.NRGBAToRGBA(op.state.color)) + case materialLinearGradient: + // TODO: implement. + enc.fillColor(f32color.NRGBAToRGBA(op.state.color1)) + default: + panic("not implemented") + } + enc.transform(inv.Invert()) + // Pop the clip stack, except the first entry used for fill. + for i := 1; i < len(op.clipStack); i++ { + cl := op.clipStack[i] + enc.endClip(cl.union.Add(absOfff)) + } + if fillMode != scene.FillModeNonzero { + enc.fillMode(scene.FillModeNonzero) + } +} + +func (c *collector) save(id int, state f32.Affine2D) { + if extra := id - len(c.states) + 1; extra > 0 { + c.states = append(c.states, make([]f32.Affine2D, extra)...) + } + c.states[id] = state +} + +func transformBounds(t f32.Affine2D, bounds f32.Rectangle) rectangle { + return rectangle{ + t.Transform(bounds.Min), t.Transform(f32.Pt(bounds.Max.X, bounds.Min.Y)), + t.Transform(bounds.Max), t.Transform(f32.Pt(bounds.Min.X, bounds.Max.Y)), + } +} + +func separateTransform(t f32.Affine2D) (f32.Affine2D, image.Point) { + sx, hx, ox, hy, sy, oy := t.Elems() + intx, fracx := math.Modf(float64(ox)) + inty, fracy := math.Modf(float64(oy)) + t = f32.NewAffine2D(sx, hx, float32(fracx), hy, sy, float32(fracy)) + return t, image.Pt(int(intx), int(inty)) +} + +func transformRect(t f32.Affine2D, r rectangle) rectangle { + var tr rectangle + for i, c := range r { + tr[i] = t.Transform(c) + } + return tr +} + +func (r rectangle) In(b f32.Rectangle) bool { + for _, c := range r { + inside := b.Min.X <= c.X && c.X <= b.Max.X && + b.Min.Y <= c.Y && c.Y <= b.Max.Y + if !inside { + return false + } + } + return true +} + +func (r rectangle) Contains(b f32.Rectangle) bool { + return true +} + +func (r rectangle) Bounds() f32.Rectangle { + bounds := f32.Rectangle{ + Min: f32.Pt(math.MaxFloat32, math.MaxFloat32), + Max: f32.Pt(-math.MaxFloat32, -math.MaxFloat32), + } + for _, c := range r { + if c.X < bounds.Min.X { + bounds.Min.X = c.X + } + if c.Y < bounds.Min.Y { + bounds.Min.Y = c.Y + } + if c.X > bounds.Max.X { + bounds.Max.X = c.X + } + if c.Y > bounds.Max.Y { + bounds.Max.Y = c.Y + } + } + return bounds +} |