// SPDX-License-Identifier: Unlicense OR MIT package gpu import ( "bytes" "encoding/binary" "errors" "fmt" "hash/maphash" "image" "image/color" "image/draw" "image/png" "io/ioutil" "math" "math/bits" "runtime" "sort" "time" "unsafe" "gioui.org/cpu" "gioui.org/f32" "gioui.org/gpu/internal/driver" "gioui.org/internal/byteslice" "gioui.org/internal/f32color" "gioui.org/internal/ops" "gioui.org/internal/scene" "gioui.org/layout" "gioui.org/op" "gioui.org/shader" "gioui.org/shader/gio" "gioui.org/shader/piet" ) type compute struct { ctx driver.Device collector collector enc encoder texOps []textureOp viewport image.Point maxTextureDim int srgb bool atlases []*textureAtlas frameCount uint moves []atlasMove programs struct { elements computeProgram tileAlloc computeProgram pathCoarse computeProgram backdrop computeProgram binning computeProgram coarse computeProgram kernel4 computeProgram } buffers struct { config sizedBuffer scene sizedBuffer state sizedBuffer memory sizedBuffer } output struct { blitPipeline driver.Pipeline buffer sizedBuffer uniforms *copyUniforms uniBuf driver.Buffer layerVertices []layerVertex descriptors *piet.Kernel4DescriptorSetLayout nullMaterials driver.Texture } // imgAllocs maps imageOpData.handles to allocs. imgAllocs map[interface{}]*atlasAlloc // materials contains the pre-processed materials (transformed images for // now, gradients etc. later) packed in a texture atlas. The atlas is used // as source in kernel4. materials struct { // allocs maps texture ops the their atlases and FillImage offsets. allocs map[textureKey]materialAlloc pipeline driver.Pipeline buffer sizedBuffer quads []materialVertex uniforms struct { u *materialUniforms buf driver.Buffer } } timers struct { profile string t *timers compact *timer render *timer blit *timer } // CPU fallback fields. useCPU bool dispatcher *dispatcher // The following fields hold scratch space to avoid garbage. zeroSlice []byte memHeader *memoryHeader conf *config } type materialAlloc struct { alloc *atlasAlloc offset image.Point } type layer struct { rect image.Rectangle alloc *atlasAlloc ops []paintOp materials *textureAtlas } type allocQuery struct { atlas *textureAtlas size image.Point empty bool format driver.TextureFormat bindings driver.BufferBinding nocompact bool } type atlasAlloc struct { atlas *textureAtlas rect image.Rectangle cpu bool dead bool frameCount uint } type atlasMove struct { src *textureAtlas dstPos image.Point srcRect image.Rectangle cpu bool } type textureAtlas struct { image driver.Texture format driver.TextureFormat bindings driver.BufferBinding hasCPU bool cpuImage cpu.ImageDescriptor size image.Point allocs []*atlasAlloc packer packer realized bool lastFrame uint compact bool } type copyUniforms struct { scale [2]float32 pos [2]float32 uvScale [2]float32 _ [8]byte // Pad to 16 bytes. } type materialUniforms struct { scale [2]float32 pos [2]float32 emulatesRGB float32 _ [12]byte // Pad to 16 bytes } type collector struct { hasher maphash.Hash profile bool reader ops.Reader states []f32.Affine2D clear bool clearColor f32color.RGBA clipStates []clipState order []hashIndex transStack []transEntry prevFrame opsCollector frame opsCollector } type transEntry struct { t f32.Affine2D relTrans f32.Affine2D } type hashIndex struct { index int hash uint64 } type opsCollector struct { paths []byte clipCmds []clipCmd ops []paintOp layers []layer } type paintOp struct { clipStack []clipCmd offset image.Point state paintKey intersect f32.Rectangle hash uint64 layer int texOpIdx int } // clipCmd describes a clipping command ready to be used for the compute // pipeline. type clipCmd struct { // union of the bounds of the operations that are clipped. union f32.Rectangle state clipKey path []byte pathKey ops.Key absBounds f32.Rectangle } type encoderState struct { relTrans f32.Affine2D clip *clipState paintKey } // clipKey completely describes a clip operation (along with its path) and is appropriate // for hashing and equality checks. type clipKey struct { bounds f32.Rectangle strokeWidth float32 relTrans f32.Affine2D pathHash uint64 } // paintKey completely defines a paint operation. It is suitable for hashing and // equality checks. type paintKey struct { t f32.Affine2D matType materialType // Current paint.ImageOp image imageOpData // Current paint.ColorOp, if any. color color.NRGBA // Current paint.LinearGradientOp. stop1 f32.Point stop2 f32.Point color1 color.NRGBA color2 color.NRGBA } type clipState struct { absBounds f32.Rectangle parent *clipState path []byte pathKey ops.Key intersect f32.Rectangle push bool clipKey } type layerVertex struct { posX, posY float32 u, v float32 } // materialVertex describes a vertex of a quad used to render a transformed // material. type materialVertex struct { posX, posY float32 u, v float32 } // textureKey identifies textureOp. type textureKey struct { handle interface{} transform f32.Affine2D bounds image.Rectangle } // textureOp represents an paintOp that requires texture space. type textureOp struct { img imageOpData key textureKey // offset is the integer offset separated from key.transform to increase cache hit rate. off image.Point // matAlloc is the atlas placement for material. matAlloc materialAlloc // imgAlloc is the atlas placement for the source image imgAlloc *atlasAlloc } type encoder struct { scene []scene.Command npath int npathseg int ntrans int } type encodeState struct { trans f32.Affine2D clip f32.Rectangle } // sizedBuffer holds a GPU buffer, or its equivalent CPU memory. type sizedBuffer struct { size int buffer driver.Buffer // cpuBuf is initialized when useCPU is true. cpuBuf cpu.BufferDescriptor } // computeProgram holds a compute program, or its equivalent CPU implementation. type computeProgram struct { prog driver.Program // CPU fields. progInfo *cpu.ProgramInfo descriptors unsafe.Pointer buffers []*cpu.BufferDescriptor } // config matches Config in setup.h type config struct { n_elements uint32 // paths n_pathseg uint32 width_in_tiles uint32 height_in_tiles uint32 tile_alloc memAlloc bin_alloc memAlloc ptcl_alloc memAlloc pathseg_alloc memAlloc anno_alloc memAlloc trans_alloc memAlloc } // memAlloc matches Alloc in mem.h type memAlloc struct { offset uint32 //size uint32 } // memoryHeader matches the header of Memory in mem.h. type memoryHeader struct { mem_offset uint32 mem_error uint32 } // rect is a oriented rectangle. type rectangle [4]f32.Point const ( layersBindings = driver.BufferBindingShaderStorageWrite | driver.BufferBindingTexture materialsBindings = driver.BufferBindingFramebuffer | driver.BufferBindingShaderStorageRead // Materials and layers can share texture storage if their bindings match. combinedBindings = layersBindings | materialsBindings ) // GPU structure sizes and constants. const ( tileWidthPx = 32 tileHeightPx = 32 ptclInitialAlloc = 1024 kernel4OutputUnit = 2 kernel4AtlasUnit = 3 pathSize = 12 binSize = 8 pathsegSize = 52 annoSize = 32 transSize = 24 stateSize = 60 stateStride = 4 + 2*stateSize ) // mem.h constants. const ( memNoError = 0 // NO_ERROR memMallocFailed = 1 // ERR_MALLOC_FAILED ) func newCompute(ctx driver.Device) (*compute, error) { caps := ctx.Caps() maxDim := caps.MaxTextureSize // Large atlas textures cause artifacts due to precision loss in // shaders. if cap := 8192; maxDim > cap { maxDim = cap } // The compute programs can only span 128x64 tiles. Limit to 64 for now, and leave the // complexity of a rectangular limit for later. if computeCap := 4096; maxDim > computeCap { maxDim = computeCap } g := &compute{ ctx: ctx, maxTextureDim: maxDim, srgb: caps.Features.Has(driver.FeatureSRGB), conf: new(config), memHeader: new(memoryHeader), } null, err := ctx.NewTexture(driver.TextureFormatRGBA8, 1, 1, driver.FilterNearest, driver.FilterNearest, driver.BufferBindingShaderStorageRead) if err != nil { g.Release() return nil, err } g.output.nullMaterials = null shaders := []struct { prog *computeProgram src shader.Sources info *cpu.ProgramInfo }{ {&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo}, {&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo}, {&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo}, {&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo}, {&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo}, {&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo}, {&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo}, } if !caps.Features.Has(driver.FeatureCompute) { if !cpu.Supported { return nil, errors.New("gpu: missing support for compute programs") } g.useCPU = true } if g.useCPU { g.dispatcher = newDispatcher(runtime.NumCPU()) } copyVert, copyFrag, err := newShaders(ctx, gio.Shader_copy_vert, gio.Shader_copy_frag) if err != nil { g.Release() return nil, err } defer copyVert.Release() defer copyFrag.Release() pipe, err := ctx.NewPipeline(driver.PipelineDesc{ VertexShader: copyVert, FragmentShader: copyFrag, VertexLayout: driver.VertexLayout{ Inputs: []driver.InputDesc{ {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, }, Stride: int(unsafe.Sizeof(g.output.layerVertices[0])), }, PixelFormat: driver.TextureFormatOutput, BlendDesc: driver.BlendDesc{ Enable: true, SrcFactor: driver.BlendFactorOne, DstFactor: driver.BlendFactorOneMinusSrcAlpha, }, Topology: driver.TopologyTriangles, }) if err != nil { g.Release() return nil, err } g.output.blitPipeline = pipe g.output.uniforms = new(copyUniforms) buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.output.uniforms))) if err != nil { g.Release() return nil, err } g.output.uniBuf = buf materialVert, materialFrag, err := newShaders(ctx, gio.Shader_material_vert, gio.Shader_material_frag) if err != nil { g.Release() return nil, err } defer materialVert.Release() defer materialFrag.Release() pipe, err = ctx.NewPipeline(driver.PipelineDesc{ VertexShader: materialVert, FragmentShader: materialFrag, VertexLayout: driver.VertexLayout{ Inputs: []driver.InputDesc{ {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, }, Stride: int(unsafe.Sizeof(g.materials.quads[0])), }, PixelFormat: driver.TextureFormatRGBA8, Topology: driver.TopologyTriangles, }) if err != nil { g.Release() return nil, err } g.materials.pipeline = pipe g.materials.uniforms.u = new(materialUniforms) buf, err = ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms.u))) if err != nil { g.Release() return nil, err } g.materials.uniforms.buf = buf for _, shader := range shaders { if !g.useCPU { p, err := ctx.NewComputeProgram(shader.src) if err != nil { g.Release() return nil, err } shader.prog.prog = p } else { shader.prog.progInfo = shader.info } } if g.useCPU { { desc := new(piet.ElementsDescriptorSetLayout) g.programs.elements.descriptors = unsafe.Pointer(desc) g.programs.elements.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1(), desc.Binding2(), desc.Binding3()} } { desc := new(piet.Tile_allocDescriptorSetLayout) g.programs.tileAlloc.descriptors = unsafe.Pointer(desc) g.programs.tileAlloc.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} } { desc := new(piet.Path_coarseDescriptorSetLayout) g.programs.pathCoarse.descriptors = unsafe.Pointer(desc) g.programs.pathCoarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} } { desc := new(piet.BackdropDescriptorSetLayout) g.programs.backdrop.descriptors = unsafe.Pointer(desc) g.programs.backdrop.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} } { desc := new(piet.BinningDescriptorSetLayout) g.programs.binning.descriptors = unsafe.Pointer(desc) g.programs.binning.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} } { desc := new(piet.CoarseDescriptorSetLayout) g.programs.coarse.descriptors = unsafe.Pointer(desc) g.programs.coarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} } { desc := new(piet.Kernel4DescriptorSetLayout) g.programs.kernel4.descriptors = unsafe.Pointer(desc) g.programs.kernel4.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} g.output.descriptors = desc } } return g, nil } func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) { vert, err = ctx.NewVertexShader(vsrc) if err != nil { return } frag, err = ctx.NewFragmentShader(fsrc) if err != nil { vert.Release() } return } func (g *compute) Frame(frameOps *op.Ops, target RenderTarget, viewport image.Point) error { g.frameCount++ g.collect(viewport, frameOps) return g.frame(target) } func (g *compute) collect(viewport image.Point, ops *op.Ops) { g.viewport = viewport g.collector.reset() g.texOps = g.texOps[:0] g.collector.collect(ops, viewport, &g.texOps) } func (g *compute) Clear(col color.NRGBA) { g.collector.clear = true g.collector.clearColor = f32color.LinearFromSRGB(col) } func (g *compute) frame(target RenderTarget) error { viewport := g.viewport defFBO := g.ctx.BeginFrame(target, g.collector.clear, viewport) defer g.ctx.EndFrame() t := &g.timers if g.collector.profile && t.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) { t.t = newTimers(g.ctx) t.compact = t.t.newTimer() t.render = t.t.newTimer() t.blit = t.t.newTimer() } if err := g.uploadImages(); err != nil { return err } if err := g.renderMaterials(); err != nil { return err } g.layer(viewport, g.texOps) t.render.begin() if err := g.renderLayers(viewport); err != nil { return err } t.render.end() d := driver.LoadDesc{ ClearColor: g.collector.clearColor, } if g.collector.clear { g.collector.clear = false d.Action = driver.LoadActionClear } t.blit.begin() g.blitLayers(d, defFBO, viewport) t.blit.end() t.compact.begin() if err := g.compactAllocs(); err != nil { return err } t.compact.end() if g.collector.profile && t.t.ready() { com, ren, blit := t.compact.Elapsed, t.render.Elapsed, t.blit.Elapsed ft := com + ren + blit q := 100 * time.Microsecond ft = ft.Round(q) com, ren, blit = com.Round(q), ren.Round(q), blit.Round(q) t.profile = fmt.Sprintf("ft:%7s com: %7s ren:%7s blit:%7s", ft, com, ren, blit) } return nil } func (g *compute) dumpAtlases() { for i, a := range g.atlases { dump := image.NewRGBA(image.Rectangle{Max: a.size}) err := driver.DownloadImage(g.ctx, a.image, dump) if err != nil { panic(err) } nrgba := image.NewNRGBA(dump.Bounds()) draw.Draw(nrgba, image.Rectangle{}, dump, image.Point{}, draw.Src) var buf bytes.Buffer if err := png.Encode(&buf, nrgba); err != nil { panic(err) } if err := ioutil.WriteFile(fmt.Sprintf("dump-%d.png", i), buf.Bytes(), 0600); err != nil { panic(err) } } } func (g *compute) Profile() string { return g.timers.profile } func (g *compute) compactAllocs() error { const ( maxAllocAge = 3 maxAtlasAge = 10 ) atlases := g.atlases for _, a := range atlases { if len(a.allocs) > 0 && g.frameCount-a.lastFrame > maxAtlasAge { a.compact = true } } for len(atlases) > 0 { var ( dstAtlas *textureAtlas format driver.TextureFormat bindings driver.BufferBinding ) g.moves = g.moves[:0] addedLayers := false useCPU := false fill: for len(atlases) > 0 { srcAtlas := atlases[0] allocs := srcAtlas.allocs if !srcAtlas.compact { atlases = atlases[1:] continue } if addedLayers && (format != srcAtlas.format || srcAtlas.bindings&bindings != srcAtlas.bindings) { break } format = srcAtlas.format bindings = srcAtlas.bindings for len(srcAtlas.allocs) > 0 { a := srcAtlas.allocs[0] n := len(srcAtlas.allocs) if g.frameCount-a.frameCount > maxAllocAge { a.dead = true srcAtlas.allocs[0] = srcAtlas.allocs[n-1] srcAtlas.allocs = srcAtlas.allocs[:n-1] continue } size := a.rect.Size() alloc, fits := g.atlasAlloc(allocQuery{ atlas: dstAtlas, size: size, format: format, bindings: bindings, nocompact: true, }) if !fits { break fill } dstAtlas = alloc.atlas allocs = append(allocs, a) addedLayers = true useCPU = useCPU || a.cpu dstAtlas.allocs = append(dstAtlas.allocs, a) pos := alloc.rect.Min g.moves = append(g.moves, atlasMove{ src: srcAtlas, dstPos: pos, srcRect: a.rect, cpu: a.cpu, }) a.atlas = dstAtlas a.rect = image.Rectangle{Min: pos, Max: pos.Add(a.rect.Size())} srcAtlas.allocs[0] = srcAtlas.allocs[n-1] srcAtlas.allocs = srcAtlas.allocs[:n-1] } srcAtlas.compact = false srcAtlas.realized = false srcAtlas.packer.clear() srcAtlas.packer.newPage() srcAtlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) atlases = atlases[1:] } if !addedLayers { break } outputSize := dstAtlas.packer.sizes[0] if err := g.realizeAtlas(dstAtlas, useCPU, outputSize); err != nil { return err } for _, move := range g.moves { if !move.cpu { g.ctx.CopyTexture(dstAtlas.image, move.dstPos, move.src.image, move.srcRect) } else { src := move.src.cpuImage.Data() dst := dstAtlas.cpuImage.Data() sstride := move.src.size.X * 4 dstride := dstAtlas.size.X * 4 copyImage(dst, dstride, move.dstPos, src, sstride, move.srcRect) } } } for i := len(g.atlases) - 1; i >= 0; i-- { a := g.atlases[i] if len(a.allocs) == 0 && g.frameCount-a.lastFrame > maxAtlasAge { a.Release() n := len(g.atlases) g.atlases[i] = g.atlases[n-1] g.atlases = g.atlases[:n-1] } } return nil } func copyImage(dst []byte, dstStride int, dstPos image.Point, src []byte, srcStride int, srcRect image.Rectangle) { sz := srcRect.Size() soff := srcRect.Min.Y*srcStride + srcRect.Min.X*4 doff := dstPos.Y*dstStride + dstPos.X*4 rowLen := sz.X * 4 for y := 0; y < sz.Y; y++ { srow := src[soff : soff+rowLen] drow := dst[doff : doff+rowLen] copy(drow, srow) soff += srcStride doff += dstStride } } func (g *compute) renderLayers(viewport image.Point) error { layers := g.collector.frame.layers for len(layers) > 0 { var materials, dst *textureAtlas addedLayers := false g.enc.reset() for len(layers) > 0 { l := &layers[0] if l.alloc != nil { layers = layers[1:] continue } if materials != nil { if l.materials != nil && materials != l.materials { // Only one materials texture per compute pass. break } } else { materials = l.materials } size := l.rect.Size() alloc, fits := g.atlasAlloc(allocQuery{ atlas: dst, empty: true, format: driver.TextureFormatRGBA8, bindings: combinedBindings, // Pad to avoid overlap. size: size.Add(image.Pt(1, 1)), }) if !fits { // Only one output atlas per compute pass. break } dst = alloc.atlas dst.compact = true addedLayers = true l.alloc = &alloc dst.allocs = append(dst.allocs, l.alloc) encodeLayer(*l, alloc.rect.Min, viewport, &g.enc, g.texOps) layers = layers[1:] } if !addedLayers { break } outputSize := dst.packer.sizes[0] tileDims := image.Point{ X: (outputSize.X + tileWidthPx - 1) / tileWidthPx, Y: (outputSize.Y + tileHeightPx - 1) / tileHeightPx, } w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx if err := g.realizeAtlas(dst, g.useCPU, image.Pt(w, h)); err != nil { return err } if err := g.render(materials, dst.image, dst.cpuImage, tileDims, dst.size.X*4); err != nil { return err } } return nil } func (g *compute) blitLayers(d driver.LoadDesc, fbo driver.Texture, viewport image.Point) { layers := g.collector.frame.layers g.output.layerVertices = g.output.layerVertices[:0] for _, l := range layers { placef := layout.FPt(l.alloc.rect.Min) sizef := layout.FPt(l.rect.Size()) r := layout.FRect(l.rect) quad := [4]layerVertex{ {posX: float32(r.Min.X), posY: float32(r.Min.Y), u: placef.X, v: placef.Y}, {posX: float32(r.Max.X), posY: float32(r.Min.Y), u: placef.X + sizef.X, v: placef.Y}, {posX: float32(r.Max.X), posY: float32(r.Max.Y), u: placef.X + sizef.X, v: placef.Y + sizef.Y}, {posX: float32(r.Min.X), posY: float32(r.Max.Y), u: placef.X, v: placef.Y + sizef.Y}, } g.output.layerVertices = append(g.output.layerVertices, quad[0], quad[1], quad[3], quad[3], quad[2], quad[1]) g.ctx.PrepareTexture(l.alloc.atlas.image) } if len(g.output.layerVertices) > 0 { vertexData := byteslice.Slice(g.output.layerVertices) g.output.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, len(vertexData)) g.output.buffer.buffer.Upload(vertexData) } g.ctx.BeginRenderPass(fbo, d) defer g.ctx.EndRenderPass() if len(layers) == 0 { return } g.ctx.Viewport(0, 0, viewport.X, viewport.Y) g.ctx.BindPipeline(g.output.blitPipeline) g.ctx.BindVertexBuffer(g.output.buffer.buffer, 0) start := 0 for len(layers) > 0 { count := 0 atlas := layers[0].alloc.atlas for len(layers) > 0 { l := layers[0] if l.alloc.atlas != atlas { break } layers = layers[1:] const verticesPerQuad = 6 count += verticesPerQuad } // Transform positions to clip space: [-1, -1] - [1, 1], and texture // coordinates to texture space: [0, 0] - [1, 1]. clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(viewport.X), 2/float32(viewport.Y))).Offset(f32.Pt(-1, -1)) sx, _, ox, _, sy, oy := clip.Elems() g.output.uniforms.scale = [2]float32{sx, sy} g.output.uniforms.pos = [2]float32{ox, oy} g.output.uniforms.uvScale = [2]float32{1 / float32(atlas.size.X), 1 / float32(atlas.size.Y)} g.output.uniBuf.Upload(byteslice.Struct(g.output.uniforms)) g.ctx.BindUniforms(g.output.uniBuf) g.ctx.BindTexture(0, atlas.image) g.ctx.DrawArrays(start, count) start += count } } func (g *compute) renderMaterials() error { m := &g.materials for k, place := range m.allocs { if place.alloc.dead { delete(m.allocs, k) } } texOps := g.texOps for len(texOps) > 0 { m.quads = m.quads[:0] var ( atlas *textureAtlas imgAtlas *textureAtlas ) // A material is clipped to avoid drawing outside its atlas bounds. // However, imprecision in the clipping may cause a single pixel // overflow. var padding = image.Pt(1, 1) var allocStart int for len(texOps) > 0 { op := &texOps[0] if a, exists := m.allocs[op.key]; exists { g.touchAlloc(a.alloc) op.matAlloc = a texOps = texOps[1:] continue } if imgAtlas != nil && op.imgAlloc.atlas != imgAtlas { // Only one image atlas per render pass. break } imgAtlas = op.imgAlloc.atlas quad := g.materialQuad(imgAtlas.size, op.key.transform, op.img, op.imgAlloc.rect.Min) boundsf := quadBounds(quad) bounds := boundRectF(boundsf) bounds = bounds.Intersect(op.key.bounds) size := bounds.Size() alloc, fits := g.atlasAlloc(allocQuery{ atlas: atlas, size: size.Add(padding), format: driver.TextureFormatRGBA8, bindings: combinedBindings, }) if !fits { break } if atlas == nil { allocStart = len(alloc.atlas.allocs) } atlas = alloc.atlas alloc.cpu = g.useCPU offsetf := layout.FPt(bounds.Min.Mul(-1)) scale := f32.Pt(float32(size.X), float32(size.Y)) for i := range quad { // Position quad to match place. quad[i].posX += offsetf.X quad[i].posY += offsetf.Y // Scale to match viewport [0, 1]. quad[i].posX /= scale.X quad[i].posY /= scale.Y } // Draw quad as two triangles. m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2]) if m.allocs == nil { m.allocs = make(map[textureKey]materialAlloc) } atlasAlloc := materialAlloc{ alloc: &alloc, offset: bounds.Min.Mul(-1), } atlas.allocs = append(atlas.allocs, atlasAlloc.alloc) m.allocs[op.key] = atlasAlloc op.matAlloc = atlasAlloc texOps = texOps[1:] } if len(m.quads) == 0 { break } realized := atlas.realized if err := g.realizeAtlas(atlas, g.useCPU, atlas.packer.sizes[0]); err != nil { return err } // Transform to clip space: [-1, -1] - [1, 1]. *m.uniforms.u = materialUniforms{ scale: [2]float32{2, 2}, pos: [2]float32{-1, -1}, } if !g.srgb { m.uniforms.u.emulatesRGB = 1.0 } m.uniforms.buf.Upload(byteslice.Struct(m.uniforms.u)) vertexData := byteslice.Slice(m.quads) n := pow2Ceil(len(vertexData)) m.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, n) m.buffer.buffer.Upload(vertexData) var d driver.LoadDesc if !realized { d.Action = driver.LoadActionClear } g.ctx.PrepareTexture(imgAtlas.image) g.ctx.BeginRenderPass(atlas.image, d) g.ctx.BindTexture(0, imgAtlas.image) g.ctx.BindPipeline(m.pipeline) g.ctx.BindUniforms(m.uniforms.buf) g.ctx.BindVertexBuffer(m.buffer.buffer, 0) newAllocs := atlas.allocs[allocStart:] for i, a := range newAllocs { sz := a.rect.Size().Sub(padding) g.ctx.Viewport(a.rect.Min.X, a.rect.Min.Y, sz.X, sz.Y) g.ctx.DrawArrays(i*6, 6) } g.ctx.EndRenderPass() if !g.useCPU { continue } src := atlas.image data := atlas.cpuImage.Data() for _, a := range newAllocs { stride := atlas.size.X * 4 col := a.rect.Min.X * 4 row := stride * a.rect.Min.Y off := col + row src.ReadPixels(a.rect, data[off:], stride) } } return nil } func (g *compute) uploadImages() error { for k, a := range g.imgAllocs { if a.dead { delete(g.imgAllocs, k) } } type upload struct { pos image.Point img *image.RGBA } var uploads []upload format := driver.TextureFormatSRGBA if !g.srgb { format = driver.TextureFormatRGBA8 } // padding is the number of pixels added to the right and below // images, to avoid atlas filtering artifacts. const padding = 1 texOps := g.texOps for len(texOps) > 0 { uploads = uploads[:0] var atlas *textureAtlas for len(texOps) > 0 { op := &texOps[0] if a, exists := g.imgAllocs[op.img.handle]; exists { g.touchAlloc(a) op.imgAlloc = a texOps = texOps[1:] continue } size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding)) alloc, fits := g.atlasAlloc(allocQuery{ atlas: atlas, size: size, format: format, bindings: driver.BufferBindingTexture | driver.BufferBindingFramebuffer, }) if !fits { break } atlas = alloc.atlas if g.imgAllocs == nil { g.imgAllocs = make(map[interface{}]*atlasAlloc) } op.imgAlloc = &alloc atlas.allocs = append(atlas.allocs, op.imgAlloc) g.imgAllocs[op.img.handle] = op.imgAlloc uploads = append(uploads, upload{pos: alloc.rect.Min, img: op.img.src}) texOps = texOps[1:] } if len(uploads) == 0 { break } if err := g.realizeAtlas(atlas, false, atlas.packer.sizes[0]); err != nil { return err } for _, u := range uploads { size := u.img.Bounds().Size() driver.UploadImage(atlas.image, u.pos, u.img) rightPadding := image.Pt(padding, size.Y) atlas.image.Upload(image.Pt(u.pos.X+size.X, u.pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4), 0) bottomPadding := image.Pt(size.X, padding) atlas.image.Upload(image.Pt(u.pos.X, u.pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4), 0) } } return nil } func pow2Ceil(v int) int { exp := bits.Len(uint(v)) if bits.OnesCount(uint(v)) == 1 { exp-- } return 1 << exp } // materialQuad constructs a quad that represents the transformed image. It returns the quad // and its bounds. func (g *compute) materialQuad(imgAtlasSize image.Point, M f32.Affine2D, img imageOpData, uvPos image.Point) [4]materialVertex { imgSize := layout.FPt(img.src.Bounds().Size()) sx, hx, ox, hy, sy, oy := M.Elems() transOff := f32.Pt(ox, oy) // The 4 corners of the image rectangle transformed by M, excluding its offset, are: // // q0: M * (0, 0) q3: M * (w, 0) // q1: M * (0, h) q2: M * (w, h) // // Note that q0 = M*0 = 0, q2 = q1 + q3. q0 := f32.Pt(0, 0) q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y) q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X) q2 := q1.Add(q3) q0 = q0.Add(transOff) q1 = q1.Add(transOff) q2 = q2.Add(transOff) q3 = q3.Add(transOff) uvPosf := layout.FPt(uvPos) atlasScale := f32.Pt(1/float32(imgAtlasSize.X), 1/float32(imgAtlasSize.Y)) uvBounds := f32.Rectangle{ Min: uvPosf, Max: uvPosf.Add(imgSize), } uvBounds.Min.X *= atlasScale.X uvBounds.Min.Y *= atlasScale.Y uvBounds.Max.X *= atlasScale.X uvBounds.Max.Y *= atlasScale.Y quad := [4]materialVertex{ {posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y}, {posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y}, {posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y}, {posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y}, } return quad } func quadBounds(q [4]materialVertex) f32.Rectangle { q0 := f32.Pt(q[0].posX, q[0].posY) q1 := f32.Pt(q[1].posX, q[1].posY) q2 := f32.Pt(q[2].posX, q[2].posY) q3 := f32.Pt(q[3].posX, q[3].posY) return f32.Rectangle{ Min: min(min(q0, q1), min(q2, q3)), Max: max(max(q0, q1), max(q2, q3)), } } func max(p1, p2 f32.Point) f32.Point { p := p1 if p2.X > p.X { p.X = p2.X } if p2.Y > p.Y { p.Y = p2.Y } return p } func min(p1, p2 f32.Point) f32.Point { p := p1 if p2.X < p.X { p.X = p2.X } if p2.Y < p.Y { p.Y = p2.Y } return p } func (enc *encoder) encodePath(verts []byte, fillMode int) { for ; len(verts) >= scene.CommandSize+4; verts = verts[scene.CommandSize+4:] { cmd := ops.DecodeCommand(verts[4:]) if cmd.Op() == scene.OpGap { if fillMode != scene.FillModeNonzero { // Skip gaps in strokes. continue } // Replace them by a straight line in outlines. cmd = scene.Line(scene.DecodeGap(cmd)) } enc.scene = append(enc.scene, cmd) enc.npathseg++ } } func (g *compute) render(images *textureAtlas, dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDims image.Point, stride int) error { const ( // wgSize is the largest and most common workgroup size. wgSize = 128 // PARTITION_SIZE from elements.comp partitionSize = 32 * 4 ) widthInBins := (tileDims.X + 15) / 16 heightInBins := (tileDims.Y + 7) / 8 if widthInBins*heightInBins > wgSize { return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx) } enc := &g.enc // Pad scene with zeroes to avoid reading garbage in elements.comp. scenePadding := partitionSize - len(enc.scene)%partitionSize enc.scene = append(enc.scene, make([]scene.Command, scenePadding)...) scene := byteslice.Slice(enc.scene) if s := len(scene); s > g.buffers.scene.size { paddedCap := s * 11 / 10 if err := g.buffers.scene.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, paddedCap); err != nil { return err } } g.buffers.scene.upload(scene) // alloc is the number of allocated bytes for static buffers. var alloc uint32 round := func(v, quantum int) int { return (v + quantum - 1) &^ (quantum - 1) } malloc := func(size int) memAlloc { size = round(size, 4) offset := alloc alloc += uint32(size) return memAlloc{offset /*, uint32(size)*/} } *g.conf = config{ n_elements: uint32(enc.npath), n_pathseg: uint32(enc.npathseg), width_in_tiles: uint32(tileDims.X), height_in_tiles: uint32(tileDims.Y), tile_alloc: malloc(enc.npath * pathSize), bin_alloc: malloc(round(enc.npath, wgSize) * binSize), ptcl_alloc: malloc(tileDims.X * tileDims.Y * ptclInitialAlloc), pathseg_alloc: malloc(enc.npathseg * pathsegSize), anno_alloc: malloc(enc.npath * annoSize), trans_alloc: malloc(enc.ntrans * transSize), } numPartitions := (enc.numElements() + 127) / 128 // clearSize is the atomic partition counter plus flag and 2 states per partition. clearSize := 4 + numPartitions*stateStride if clearSize > g.buffers.state.size { paddedCap := clearSize * 11 / 10 if err := g.buffers.state.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, paddedCap); err != nil { return err } } confData := byteslice.Struct(g.conf) g.buffers.config.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, len(confData)) g.buffers.config.upload(confData) minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc) if minSize > g.buffers.memory.size { // Add space for dynamic GPU allocations. const sizeBump = 4 * 1024 * 1024 minSize += sizeBump if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, minSize); err != nil { return err } } for { *g.memHeader = memoryHeader{ mem_offset: alloc, } g.buffers.memory.upload(byteslice.Struct(g.memHeader)) g.buffers.state.upload(g.zeros(clearSize)) if !g.useCPU { g.ctx.BeginCompute() g.ctx.BindImageTexture(kernel4OutputUnit, dst) img := g.output.nullMaterials if images != nil { img = images.image } g.ctx.BindImageTexture(kernel4AtlasUnit, img) } else { *g.output.descriptors.Binding2() = cpuDst if images != nil { *g.output.descriptors.Binding3() = images.cpuImage } } g.bindBuffers() g.memoryBarrier() g.dispatch(g.programs.elements, numPartitions, 1, 1) g.memoryBarrier() g.dispatch(g.programs.tileAlloc, (enc.npath+wgSize-1)/wgSize, 1, 1) g.memoryBarrier() g.dispatch(g.programs.pathCoarse, (enc.npathseg+31)/32, 1, 1) g.memoryBarrier() g.dispatch(g.programs.backdrop, (enc.npath+wgSize-1)/wgSize, 1, 1) // No barrier needed between backdrop and binning. g.dispatch(g.programs.binning, (enc.npath+wgSize-1)/wgSize, 1, 1) g.memoryBarrier() g.dispatch(g.programs.coarse, widthInBins, heightInBins, 1) g.memoryBarrier() g.dispatch(g.programs.kernel4, tileDims.X, tileDims.Y, 1) g.memoryBarrier() if !g.useCPU { g.ctx.EndCompute() } else { g.dispatcher.Sync() } if err := g.buffers.memory.download(byteslice.Struct(g.memHeader)); err != nil { if err == driver.ErrContentLost { continue } return err } switch errCode := g.memHeader.mem_error; errCode { case memNoError: if g.useCPU { w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx dst.Upload(image.Pt(0, 0), image.Pt(w, h), cpuDst.Data(), stride) } return nil case memMallocFailed: // Resize memory and try again. sz := g.buffers.memory.size * 15 / 10 if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, sz); err != nil { return err } continue default: return fmt.Errorf("compute: shader program failed with error %d", errCode) } } } func (g *compute) memoryBarrier() { if g.useCPU { g.dispatcher.Barrier() } } func (g *compute) dispatch(p computeProgram, x, y, z int) { if !g.useCPU { g.ctx.BindProgram(p.prog) g.ctx.DispatchCompute(x, y, z) } else { g.dispatcher.Dispatch(p.progInfo, p.descriptors, x, y, z) } } // zeros returns a byte slice with size bytes of zeros. func (g *compute) zeros(size int) []byte { if cap(g.zeroSlice) < size { g.zeroSlice = append(g.zeroSlice, make([]byte, size)...) } return g.zeroSlice[:size] } func (g *compute) touchAlloc(a *atlasAlloc) { if a.dead { panic("re-use of dead allocation") } a.frameCount = g.frameCount a.atlas.lastFrame = a.frameCount } func (g *compute) atlasAlloc(q allocQuery) (atlasAlloc, bool) { var ( place placement fits bool atlas = q.atlas ) if atlas != nil { place, fits = atlas.packer.tryAdd(q.size) if !fits { atlas.compact = true } } if atlas == nil { // Look for matching atlas to re-use. for _, a := range g.atlases { if q.empty && len(a.allocs) > 0 { continue } if q.nocompact && a.compact { continue } if a.format != q.format || a.bindings&q.bindings != q.bindings { continue } place, fits = a.packer.tryAdd(q.size) if !fits { a.compact = true continue } atlas = a break } } if atlas == nil { atlas = &textureAtlas{ format: q.format, bindings: q.bindings, } atlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) atlas.packer.newPage() g.atlases = append(g.atlases, atlas) place, fits = atlas.packer.tryAdd(q.size) if !fits { panic(fmt.Errorf("compute: atlas allocation too large (%v)", q.size)) } } if !fits { return atlasAlloc{}, false } atlas.lastFrame = g.frameCount return atlasAlloc{ frameCount: g.frameCount, atlas: atlas, rect: image.Rectangle{Min: place.Pos, Max: place.Pos.Add(q.size)}, }, true } func (g *compute) realizeAtlas(atlas *textureAtlas, useCPU bool, size image.Point) error { defer func() { atlas.packer.maxDims = atlas.size atlas.realized = true atlas.ensureCPUImage(useCPU) }() if atlas.size.X >= size.X && atlas.size.Y >= size.Y { return nil } if atlas.realized { panic("resizing a realized atlas") } if err := atlas.resize(g.ctx, size); err != nil { return err } return nil } func (a *textureAtlas) resize(ctx driver.Device, size image.Point) error { a.Release() img, err := ctx.NewTexture(a.format, size.X, size.Y, driver.FilterNearest, driver.FilterNearest, a.bindings) if err != nil { return err } a.image = img a.size = size return nil } func (a *textureAtlas) ensureCPUImage(useCPU bool) { if !useCPU || a.hasCPU { return } a.hasCPU = true a.cpuImage = cpu.NewImageRGBA(a.size.X, a.size.Y) } func (g *compute) Release() { if g.useCPU { g.dispatcher.Stop() } type resource interface { Release() } res := []resource{ g.output.nullMaterials, &g.programs.elements, &g.programs.tileAlloc, &g.programs.pathCoarse, &g.programs.backdrop, &g.programs.binning, &g.programs.coarse, &g.programs.kernel4, g.output.blitPipeline, &g.output.buffer, g.output.uniBuf, &g.buffers.scene, &g.buffers.state, &g.buffers.memory, &g.buffers.config, g.materials.pipeline, &g.materials.buffer, g.materials.uniforms.buf, g.timers.t, } for _, r := range res { if r != nil { r.Release() } } for _, a := range g.atlases { a.Release() } g.ctx.Release() *g = compute{} } func (a *textureAtlas) Release() { if a.image != nil { a.image.Release() a.image = nil } a.cpuImage.Free() a.hasCPU = false } func (g *compute) bindBuffers() { g.bindStorageBuffers(g.programs.elements, g.buffers.memory, g.buffers.config, g.buffers.scene, g.buffers.state) g.bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory, g.buffers.config) g.bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory, g.buffers.config) g.bindStorageBuffers(g.programs.backdrop, g.buffers.memory, g.buffers.config) g.bindStorageBuffers(g.programs.binning, g.buffers.memory, g.buffers.config) g.bindStorageBuffers(g.programs.coarse, g.buffers.memory, g.buffers.config) g.bindStorageBuffers(g.programs.kernel4, g.buffers.memory, g.buffers.config) } func (p *computeProgram) Release() { if p.prog != nil { p.prog.Release() } *p = computeProgram{} } func (b *sizedBuffer) Release() { if b.buffer != nil { b.buffer.Release() } b.cpuBuf.Free() *b = sizedBuffer{} } func (b *sizedBuffer) ensureCapacity(useCPU bool, ctx driver.Device, binding driver.BufferBinding, size int) error { if b.size >= size { return nil } if b.buffer != nil { b.Release() } b.cpuBuf.Free() if !useCPU { buf, err := ctx.NewBuffer(binding, size) if err != nil { return err } b.buffer = buf } else { b.cpuBuf = cpu.NewBuffer(size) } b.size = size return nil } func (b *sizedBuffer) download(data []byte) error { if b.buffer != nil { return b.buffer.Download(data) } else { copy(data, b.cpuBuf.Data()) return nil } } func (b *sizedBuffer) upload(data []byte) { if b.buffer != nil { b.buffer.Upload(data) } else { copy(b.cpuBuf.Data(), data) } } func (g *compute) bindStorageBuffers(prog computeProgram, buffers ...sizedBuffer) { for i, buf := range buffers { if !g.useCPU { g.ctx.BindStorageBuffer(i, buf.buffer) } else { *prog.buffers[i] = buf.cpuBuf } } } var bo = binary.LittleEndian func (e *encoder) reset() { e.scene = e.scene[:0] e.npath = 0 e.npathseg = 0 e.ntrans = 0 } func (e *encoder) numElements() int { return len(e.scene) } func (e *encoder) append(e2 encoder) { e.scene = append(e.scene, e2.scene...) e.npath += e2.npath e.npathseg += e2.npathseg e.ntrans += e2.ntrans } func (e *encoder) transform(m f32.Affine2D) { e.scene = append(e.scene, scene.Transform(m)) e.ntrans++ } func (e *encoder) lineWidth(width float32) { e.scene = append(e.scene, scene.SetLineWidth(width)) } func (e *encoder) fillMode(mode scene.FillMode) { e.scene = append(e.scene, scene.SetFillMode(mode)) } func (e *encoder) beginClip(bbox f32.Rectangle) { e.scene = append(e.scene, scene.BeginClip(bbox)) e.npath++ } func (e *encoder) endClip(bbox f32.Rectangle) { e.scene = append(e.scene, scene.EndClip(bbox)) e.npath++ } func (e *encoder) rect(r f32.Rectangle) { // Rectangle corners, clock-wise. c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y) e.line(c0, c1) e.line(c1, c2) e.line(c2, c3) e.line(c3, c0) } func (e *encoder) fillColor(col color.RGBA) { e.scene = append(e.scene, scene.FillColor(col)) e.npath++ } func (e *encoder) fillImage(index int, offset image.Point) { e.scene = append(e.scene, scene.FillImage(index, offset)) e.npath++ } func (e *encoder) line(start, end f32.Point) { e.scene = append(e.scene, scene.Line(start, end)) e.npathseg++ } func (e *encoder) quad(start, ctrl, end f32.Point) { e.scene = append(e.scene, scene.Quad(start, ctrl, end)) e.npathseg++ } func (c *collector) reset() { c.prevFrame, c.frame = c.frame, c.prevFrame c.profile = false c.clipStates = c.clipStates[:0] c.transStack = c.transStack[:0] c.frame.reset() } func (c *opsCollector) reset() { c.paths = c.paths[:0] c.clipCmds = c.clipCmds[:0] c.ops = c.ops[:0] c.layers = c.layers[:0] } func (c *collector) addClip(state *encoderState, viewport, bounds f32.Rectangle, path []byte, key ops.Key, hash uint64, strokeWidth float32, push bool) { // Rectangle clip regions. if len(path) == 0 && !push { // If the rectangular clip region contains a previous path it can be discarded. p := state.clip t := state.relTrans.Invert() for p != nil { // rect is the parent bounds transformed relative to the rectangle. rect := transformBounds(t, p.bounds) if rect.In(bounds) { return } t = p.relTrans.Invert().Mul(t) p = p.parent } } absBounds := transformBounds(state.t, bounds).Bounds() intersect := absBounds if state.clip != nil { intersect = state.clip.intersect.Intersect(intersect) } c.clipStates = append(c.clipStates, clipState{ parent: state.clip, absBounds: absBounds, path: path, pathKey: key, intersect: intersect, clipKey: clipKey{ bounds: bounds, relTrans: state.relTrans, strokeWidth: strokeWidth, pathHash: hash, }, }) state.clip = &c.clipStates[len(c.clipStates)-1] state.relTrans = f32.Affine2D{} } func (c *collector) collect(root *op.Ops, viewport image.Point, texOps *[]textureOp) { fview := f32.Rectangle{Max: layout.FPt(viewport)} var intOps *ops.Ops if root != nil { intOps = &root.Internal } c.reader.Reset(intOps) var state encoderState reset := func() { state = encoderState{ paintKey: paintKey{ color: color.NRGBA{A: 0xff}, }, } } reset() r := &c.reader var ( pathData struct { data []byte key ops.Key hash uint64 } strWidth float32 ) c.addClip(&state, fview, fview, nil, ops.Key{}, 0, 0, false) for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() { switch ops.OpType(encOp.Data[0]) { case ops.TypeProfile: c.profile = true case ops.TypeTransform: dop, push := ops.DecodeTransform(encOp.Data) if push { c.transStack = append(c.transStack, transEntry{t: state.t, relTrans: state.relTrans}) } state.t = state.t.Mul(dop) state.relTrans = state.relTrans.Mul(dop) case ops.TypePopTransform: n := len(c.transStack) st := c.transStack[n-1] c.transStack = c.transStack[:n-1] state.t = st.t state.relTrans = st.relTrans case ops.TypeStroke: strWidth = decodeStrokeOp(encOp.Data) case ops.TypePath: hash := bo.Uint64(encOp.Data[1:]) encOp, ok = r.Decode() if !ok { panic("unexpected end of path operation") } pathData.data = encOp.Data[ops.TypeAuxLen:] pathData.key = encOp.Key pathData.hash = hash case ops.TypeClip: var op ops.ClipOp op.Decode(encOp.Data) bounds := layout.FRect(op.Bounds) c.addClip(&state, fview, bounds, pathData.data, pathData.key, pathData.hash, strWidth, true) pathData.data = nil strWidth = 0 case ops.TypePopClip: state.relTrans = state.clip.relTrans.Mul(state.relTrans) state.clip = state.clip.parent case ops.TypeColor: state.matType = materialColor state.color = decodeColorOp(encOp.Data) case ops.TypeLinearGradient: state.matType = materialLinearGradient op := decodeLinearGradientOp(encOp.Data) state.stop1 = op.stop1 state.stop2 = op.stop2 state.color1 = op.color1 state.color2 = op.color2 case ops.TypeImage: state.matType = materialTexture state.image = decodeImageOp(encOp.Data, encOp.Refs) case ops.TypePaint: paintState := state if paintState.matType == materialTexture { // Clip to the bounds of the image, to hide other images in the atlas. sz := state.image.src.Rect.Size() bounds := f32.Rectangle{Max: layout.FPt(sz)} c.addClip(&paintState, fview, bounds, nil, ops.Key{}, 0, 0, false) } intersect := paintState.clip.intersect if intersect.Empty() { break } // If the paint is a uniform opaque color that takes up the whole // screen, it covers all previous paints and we can discard all // rendering commands recorded so far. if paintState.clip == nil && paintState.matType == materialColor && paintState.color.A == 255 { c.clearColor = f32color.LinearFromSRGB(paintState.color).Opaque() c.clear = true c.frame.reset() break } // Flatten clip stack. p := paintState.clip startIdx := len(c.frame.clipCmds) for p != nil { idx := len(c.frame.paths) c.frame.paths = append(c.frame.paths, make([]byte, len(p.path))...) path := c.frame.paths[idx:] copy(path, p.path) c.frame.clipCmds = append(c.frame.clipCmds, clipCmd{ state: p.clipKey, path: path, pathKey: p.pathKey, absBounds: p.absBounds, }) p = p.parent } clipStack := c.frame.clipCmds[startIdx:] c.frame.ops = append(c.frame.ops, paintOp{ clipStack: clipStack, state: paintState.paintKey, intersect: intersect, }) case ops.TypeSave: id := ops.DecodeSave(encOp.Data) c.save(id, state.t) case ops.TypeLoad: reset() id := ops.DecodeLoad(encOp.Data) state.t = c.states[id] state.relTrans = state.t } } for i := range c.frame.ops { op := &c.frame.ops[i] // For each clip, cull rectangular clip regions that contain its // (transformed) bounds. addClip already handled the converse case. // TODO: do better than O(n²) to efficiently deal with deep stacks. for j := 0; j < len(op.clipStack)-1; j++ { cl := op.clipStack[j] p := cl.state r := transformBounds(p.relTrans, p.bounds) for k := j + 1; k < len(op.clipStack); k++ { cl2 := op.clipStack[k] p2 := cl2.state if len(cl2.path) == 0 && r.In(cl2.state.bounds) { op.clipStack = append(op.clipStack[:k], op.clipStack[k+1:]...) k-- op.clipStack[k].state.relTrans = p2.relTrans.Mul(op.clipStack[k].state.relTrans) } r = transformRect(p2.relTrans, r) } } // Separate the integer offset from the first transform. Two ops that differ // only in integer offsets may share backing storage. if len(op.clipStack) > 0 { c := &op.clipStack[len(op.clipStack)-1] t := c.state.relTrans t, off := separateTransform(t) c.state.relTrans = t op.offset = off op.state.t = op.state.t.Offset(layout.FPt(off.Mul(-1))) } op.hash = c.hashOp(*op) op.texOpIdx = -1 switch op.state.matType { case materialTexture: op.texOpIdx = len(*texOps) // Separate integer offset from transformation. TextureOps that have identical transforms // except for their integer offsets can share a transformed image. t := op.state.t.Offset(layout.FPt(op.offset)) t, off := separateTransform(t) bounds := boundRectF(op.intersect).Sub(off) *texOps = append(*texOps, textureOp{ img: op.state.image, off: off, key: textureKey{ bounds: bounds, transform: t, handle: op.state.image.handle, }, }) } } } func (c *collector) hashOp(op paintOp) uint64 { c.hasher.Reset() for _, cl := range op.clipStack { k := cl.state keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) c.hasher.Write(keyBytes[:]) } k := op.state keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) c.hasher.Write(keyBytes[:]) return c.hasher.Sum64() } func (g *compute) layer(viewport image.Point, texOps []textureOp) { // Sort ops from previous frames by hash. c := &g.collector prevOps := c.prevFrame.ops c.order = c.order[:0] for i, op := range prevOps { c.order = append(c.order, hashIndex{ index: i, hash: op.hash, }) } sort.Slice(c.order, func(i, j int) bool { return c.order[i].hash < c.order[j].hash }) // Split layers with different materials atlas; the compute stage has only // one materials slot. splitLayer := func(ops []paintOp, prevLayerIdx int) { for len(ops) > 0 { var materials *textureAtlas idx := 0 for idx < len(ops) { if i := ops[idx].texOpIdx; i != -1 { omats := texOps[i].matAlloc.alloc.atlas if materials != nil && omats != nil && omats != materials { break } materials = omats } idx++ } l := layer{ops: ops[:idx], materials: materials} if prevLayerIdx != -1 { prev := c.prevFrame.layers[prevLayerIdx] if !prev.alloc.dead && len(prev.ops) == len(l.ops) { l.alloc = prev.alloc l.materials = prev.materials g.touchAlloc(l.alloc) } } for i, op := range l.ops { l.rect = l.rect.Union(boundRectF(op.intersect)) l.ops[i].layer = len(c.frame.layers) } c.frame.layers = append(c.frame.layers, l) ops = ops[idx:] } } ops := c.frame.ops idx := 0 for idx < len(ops) { op := ops[idx] // Search for longest matching op sequence. // start is the earliest index of a match. start := searchOp(c.order, op.hash) layerOps, prevLayerIdx := longestLayer(prevOps, c.order[start:], ops[idx:]) if len(layerOps) == 0 { idx++ continue } if unmatched := ops[:idx]; len(unmatched) > 0 { // Flush layer of unmatched ops. splitLayer(unmatched, -1) ops = ops[idx:] idx = 0 } splitLayer(layerOps, prevLayerIdx) ops = ops[len(layerOps):] } if len(ops) > 0 { splitLayer(ops, -1) } } func longestLayer(prev []paintOp, order []hashIndex, ops []paintOp) ([]paintOp, int) { longest := 0 longestIdx := -1 outer: for len(order) > 0 { first := order[0] order = order[1:] match := prev[first.index:] // Potential match found. Now find longest matching sequence. end := 0 layer := match[0].layer off := match[0].offset.Sub(ops[0].offset) for end < len(match) && end < len(ops) { m := match[end] o := ops[end] // End layers on previous match. if m.layer != layer { break } // End layer when the next op doesn't match. if m.hash != o.hash { if end == 0 { // Hashes are sorted so if the first op doesn't match, no // more matches are possible. break outer } break } if !opEqual(off, m, o) { break } end++ } if end > longest { longest = end longestIdx = layer } } return ops[:longest], longestIdx } func searchOp(order []hashIndex, hash uint64) int { lo, hi := 0, len(order) for lo < hi { mid := (lo + hi) / 2 if order[mid].hash < hash { lo = mid + 1 } else { hi = mid } } return lo } func opEqual(off image.Point, o1 paintOp, o2 paintOp) bool { if len(o1.clipStack) != len(o2.clipStack) { return false } if o1.state != o2.state { return false } if o1.offset.Sub(o2.offset) != off { return false } for i, cl1 := range o1.clipStack { cl2 := o2.clipStack[i] if len(cl1.path) != len(cl2.path) { return false } if cl1.state != cl2.state { return false } if cl1.pathKey != cl2.pathKey && !bytes.Equal(cl1.path, cl2.path) { return false } } return true } func encodeLayer(l layer, pos image.Point, viewport image.Point, enc *encoder, texOps []textureOp) { off := pos.Sub(l.rect.Min) offf := layout.FPt(off) enc.transform(f32.Affine2D{}.Offset(offf)) for _, op := range l.ops { encodeOp(viewport, off, enc, texOps, op) } enc.transform(f32.Affine2D{}.Offset(offf.Mul(-1))) } func encodeOp(viewport image.Point, absOff image.Point, enc *encoder, texOps []textureOp, op paintOp) { // Fill in clip bounds, which the shaders expect to be the union // of all affected bounds. var union f32.Rectangle for i, cl := range op.clipStack { union = union.Union(cl.absBounds) op.clipStack[i].union = union } absOfff := layout.FPt(absOff) fillMode := scene.FillModeNonzero opOff := layout.FPt(op.offset) inv := f32.Affine2D{}.Offset(opOff) enc.transform(inv) for i := len(op.clipStack) - 1; i >= 0; i-- { cl := op.clipStack[i] if w := cl.state.strokeWidth; w > 0 { enc.fillMode(scene.FillModeStroke) enc.lineWidth(w) fillMode = scene.FillModeStroke } else if fillMode != scene.FillModeNonzero { enc.fillMode(scene.FillModeNonzero) fillMode = scene.FillModeNonzero } enc.transform(cl.state.relTrans) inv = inv.Mul(cl.state.relTrans) if len(cl.path) == 0 { enc.rect(cl.state.bounds) } else { enc.encodePath(cl.path, fillMode) } if i != 0 { enc.beginClip(cl.union.Add(absOfff)) } } if len(op.clipStack) == 0 { // No clipping; fill the entire view. enc.rect(f32.Rectangle{Max: layout.FPt(viewport)}) } switch op.state.matType { case materialTexture: texOp := texOps[op.texOpIdx] off := texOp.matAlloc.alloc.rect.Min.Add(texOp.matAlloc.offset).Sub(texOp.off).Sub(absOff) enc.fillImage(0, off) case materialColor: enc.fillColor(f32color.NRGBAToRGBA(op.state.color)) case materialLinearGradient: // TODO: implement. enc.fillColor(f32color.NRGBAToRGBA(op.state.color1)) default: panic("not implemented") } enc.transform(inv.Invert()) // Pop the clip stack, except the first entry used for fill. for i := 1; i < len(op.clipStack); i++ { cl := op.clipStack[i] enc.endClip(cl.union.Add(absOfff)) } if fillMode != scene.FillModeNonzero { enc.fillMode(scene.FillModeNonzero) } } func (c *collector) save(id int, state f32.Affine2D) { if extra := id - len(c.states) + 1; extra > 0 { c.states = append(c.states, make([]f32.Affine2D, extra)...) } c.states[id] = state } func transformBounds(t f32.Affine2D, bounds f32.Rectangle) rectangle { return rectangle{ t.Transform(bounds.Min), t.Transform(f32.Pt(bounds.Max.X, bounds.Min.Y)), t.Transform(bounds.Max), t.Transform(f32.Pt(bounds.Min.X, bounds.Max.Y)), } } func separateTransform(t f32.Affine2D) (f32.Affine2D, image.Point) { sx, hx, ox, hy, sy, oy := t.Elems() intx, fracx := math.Modf(float64(ox)) inty, fracy := math.Modf(float64(oy)) t = f32.NewAffine2D(sx, hx, float32(fracx), hy, sy, float32(fracy)) return t, image.Pt(int(intx), int(inty)) } func transformRect(t f32.Affine2D, r rectangle) rectangle { var tr rectangle for i, c := range r { tr[i] = t.Transform(c) } return tr } func (r rectangle) In(b f32.Rectangle) bool { for _, c := range r { inside := b.Min.X <= c.X && c.X <= b.Max.X && b.Min.Y <= c.Y && c.Y <= b.Max.Y if !inside { return false } } return true } func (r rectangle) Contains(b f32.Rectangle) bool { return true } func (r rectangle) Bounds() f32.Rectangle { bounds := f32.Rectangle{ Min: f32.Pt(math.MaxFloat32, math.MaxFloat32), Max: f32.Pt(-math.MaxFloat32, -math.MaxFloat32), } for _, c := range r { if c.X < bounds.Min.X { bounds.Min.X = c.X } if c.Y < bounds.Min.Y { bounds.Min.Y = c.Y } if c.X > bounds.Max.X { bounds.Max.X = c.X } if c.Y > bounds.Max.Y { bounds.Max.Y = c.Y } } return bounds }