diff options
Diffstat (limited to 'vendor/gioui.org/gpu/internal')
-rw-r--r-- | vendor/gioui.org/gpu/internal/d3d11/d3d11.go | 5 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/d3d11/d3d11_windows.go | 859 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/driver/api.go | 127 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/driver/driver.go | 237 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/metal/metal.go | 5 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/metal/metal_darwin.go | 1141 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/opengl/opengl.go | 1357 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/opengl/srgb.go | 176 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/vulkan/vulkan.go | 1121 | ||||
-rw-r--r-- | vendor/gioui.org/gpu/internal/vulkan/vulkan_nosupport.go | 5 |
10 files changed, 5033 insertions, 0 deletions
diff --git a/vendor/gioui.org/gpu/internal/d3d11/d3d11.go b/vendor/gioui.org/gpu/internal/d3d11/d3d11.go new file mode 100644 index 0000000..3ddf7c3 --- /dev/null +++ b/vendor/gioui.org/gpu/internal/d3d11/d3d11.go @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +// This file exists so this package builds on non-Windows platforms. + +package d3d11 diff --git a/vendor/gioui.org/gpu/internal/d3d11/d3d11_windows.go b/vendor/gioui.org/gpu/internal/d3d11/d3d11_windows.go new file mode 100644 index 0000000..08698c3 --- /dev/null +++ b/vendor/gioui.org/gpu/internal/d3d11/d3d11_windows.go @@ -0,0 +1,859 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package d3d11 + +import ( + "errors" + "fmt" + "image" + "math" + "reflect" + "unsafe" + + "golang.org/x/sys/windows" + + "gioui.org/gpu/internal/driver" + "gioui.org/internal/d3d11" + "gioui.org/shader" +) + +type Backend struct { + dev *d3d11.Device + ctx *d3d11.DeviceContext + + // Temporary storage to avoid garbage. + clearColor [4]float32 + viewport d3d11.VIEWPORT + + pipeline *Pipeline + vert struct { + buffer *Buffer + offset int + } + + program *Program + + caps driver.Caps + + floatFormat uint32 +} + +type Pipeline struct { + vert *d3d11.VertexShader + frag *d3d11.PixelShader + layout *d3d11.InputLayout + blend *d3d11.BlendState + stride int + topology driver.Topology +} + +type Texture struct { + backend *Backend + format uint32 + bindings driver.BufferBinding + tex *d3d11.Texture2D + sampler *d3d11.SamplerState + resView *d3d11.ShaderResourceView + uaView *d3d11.UnorderedAccessView + renderTarget *d3d11.RenderTargetView + + width int + height int + foreign bool +} + +type VertexShader struct { + backend *Backend + shader *d3d11.VertexShader + src shader.Sources +} + +type FragmentShader struct { + backend *Backend + shader *d3d11.PixelShader +} + +type Program struct { + backend *Backend + shader *d3d11.ComputeShader +} + +type Buffer struct { + backend *Backend + bind uint32 + buf *d3d11.Buffer + resView *d3d11.ShaderResourceView + uaView *d3d11.UnorderedAccessView + size int + immutable bool +} + +func init() { + driver.NewDirect3D11Device = newDirect3D11Device +} + +func detectFloatFormat(dev *d3d11.Device) (uint32, bool) { + formats := []uint32{ + d3d11.DXGI_FORMAT_R16_FLOAT, + d3d11.DXGI_FORMAT_R32_FLOAT, + d3d11.DXGI_FORMAT_R16G16_FLOAT, + d3d11.DXGI_FORMAT_R32G32_FLOAT, + // These last two are really wasteful, but c'est la vie. + d3d11.DXGI_FORMAT_R16G16B16A16_FLOAT, + d3d11.DXGI_FORMAT_R32G32B32A32_FLOAT, + } + for _, format := range formats { + need := uint32(d3d11.FORMAT_SUPPORT_TEXTURE2D | d3d11.FORMAT_SUPPORT_RENDER_TARGET) + if support, _ := dev.CheckFormatSupport(format); support&need == need { + return format, true + } + } + return 0, false +} + +func newDirect3D11Device(api driver.Direct3D11) (driver.Device, error) { + dev := (*d3d11.Device)(api.Device) + b := &Backend{ + dev: dev, + ctx: dev.GetImmediateContext(), + caps: driver.Caps{ + MaxTextureSize: 2048, // 9.1 maximum + Features: driver.FeatureSRGB, + }, + } + featLvl := dev.GetFeatureLevel() + switch { + case featLvl < d3d11.FEATURE_LEVEL_9_1: + d3d11.IUnknownRelease(unsafe.Pointer(dev), dev.Vtbl.Release) + d3d11.IUnknownRelease(unsafe.Pointer(b.ctx), b.ctx.Vtbl.Release) + return nil, fmt.Errorf("d3d11: feature level too low: %d", featLvl) + case featLvl >= d3d11.FEATURE_LEVEL_11_0: + b.caps.MaxTextureSize = 16384 + b.caps.Features |= driver.FeatureCompute + case featLvl >= d3d11.FEATURE_LEVEL_9_3: + b.caps.MaxTextureSize = 4096 + } + if fmt, ok := detectFloatFormat(dev); ok { + b.floatFormat = fmt + b.caps.Features |= driver.FeatureFloatRenderTargets + } + // Disable backface culling to match OpenGL. + state, err := dev.CreateRasterizerState(&d3d11.RASTERIZER_DESC{ + CullMode: d3d11.CULL_NONE, + FillMode: d3d11.FILL_SOLID, + }) + if err != nil { + return nil, err + } + defer d3d11.IUnknownRelease(unsafe.Pointer(state), state.Vtbl.Release) + b.ctx.RSSetState(state) + return b, nil +} + +func (b *Backend) BeginFrame(target driver.RenderTarget, clear bool, viewport image.Point) driver.Texture { + var ( + renderTarget *d3d11.RenderTargetView + ) + if target != nil { + switch t := target.(type) { + case driver.Direct3D11RenderTarget: + renderTarget = (*d3d11.RenderTargetView)(t.RenderTarget) + case *Texture: + renderTarget = t.renderTarget + default: + panic(fmt.Errorf("d3d11: invalid render target type: %T", target)) + } + } + b.ctx.OMSetRenderTargets(renderTarget, nil) + return &Texture{backend: b, renderTarget: renderTarget, foreign: true} +} + +func (b *Backend) CopyTexture(dstTex driver.Texture, dstOrigin image.Point, srcTex driver.Texture, srcRect image.Rectangle) { + dst := (*d3d11.Resource)(unsafe.Pointer(dstTex.(*Texture).tex)) + src := (*d3d11.Resource)(srcTex.(*Texture).tex) + b.ctx.CopySubresourceRegion( + dst, + 0, // Destination subresource. + uint32(dstOrigin.X), uint32(dstOrigin.Y), 0, // Destination coordinates (x, y, z). + src, + 0, // Source subresource. + &d3d11.BOX{ + Left: uint32(srcRect.Min.X), + Top: uint32(srcRect.Min.Y), + Right: uint32(srcRect.Max.X), + Bottom: uint32(srcRect.Max.Y), + Front: 0, + Back: 1, + }, + ) +} + +func (b *Backend) EndFrame() { +} + +func (b *Backend) Caps() driver.Caps { + return b.caps +} + +func (b *Backend) NewTimer() driver.Timer { + panic("timers not supported") +} + +func (b *Backend) IsTimeContinuous() bool { + panic("timers not supported") +} + +func (b *Backend) Release() { + d3d11.IUnknownRelease(unsafe.Pointer(b.ctx), b.ctx.Vtbl.Release) + *b = Backend{} +} + +func (b *Backend) NewTexture(format driver.TextureFormat, width, height int, minFilter, magFilter driver.TextureFilter, bindings driver.BufferBinding) (driver.Texture, error) { + var d3dfmt uint32 + switch format { + case driver.TextureFormatFloat: + d3dfmt = b.floatFormat + case driver.TextureFormatSRGBA: + d3dfmt = d3d11.DXGI_FORMAT_R8G8B8A8_UNORM_SRGB + case driver.TextureFormatRGBA8: + d3dfmt = d3d11.DXGI_FORMAT_R8G8B8A8_UNORM + default: + return nil, fmt.Errorf("unsupported texture format %d", format) + } + tex, err := b.dev.CreateTexture2D(&d3d11.TEXTURE2D_DESC{ + Width: uint32(width), + Height: uint32(height), + MipLevels: 1, + ArraySize: 1, + Format: d3dfmt, + SampleDesc: d3d11.DXGI_SAMPLE_DESC{ + Count: 1, + Quality: 0, + }, + BindFlags: convBufferBinding(bindings), + }) + if err != nil { + return nil, err + } + var ( + sampler *d3d11.SamplerState + resView *d3d11.ShaderResourceView + uaView *d3d11.UnorderedAccessView + fbo *d3d11.RenderTargetView + ) + if bindings&driver.BufferBindingTexture != 0 { + var filter uint32 + switch { + case minFilter == driver.FilterNearest && magFilter == driver.FilterNearest: + filter = d3d11.FILTER_MIN_MAG_MIP_POINT + case minFilter == driver.FilterLinear && magFilter == driver.FilterLinear: + filter = d3d11.FILTER_MIN_MAG_LINEAR_MIP_POINT + default: + d3d11.IUnknownRelease(unsafe.Pointer(tex), tex.Vtbl.Release) + return nil, fmt.Errorf("unsupported texture filter combination %d, %d", minFilter, magFilter) + } + var err error + sampler, err = b.dev.CreateSamplerState(&d3d11.SAMPLER_DESC{ + Filter: filter, + AddressU: d3d11.TEXTURE_ADDRESS_CLAMP, + AddressV: d3d11.TEXTURE_ADDRESS_CLAMP, + AddressW: d3d11.TEXTURE_ADDRESS_CLAMP, + MaxAnisotropy: 1, + MinLOD: -math.MaxFloat32, + MaxLOD: math.MaxFloat32, + }) + if err != nil { + d3d11.IUnknownRelease(unsafe.Pointer(tex), tex.Vtbl.Release) + return nil, err + } + resView, err = b.dev.CreateShaderResourceView( + (*d3d11.Resource)(unsafe.Pointer(tex)), + unsafe.Pointer(&d3d11.SHADER_RESOURCE_VIEW_DESC_TEX2D{ + SHADER_RESOURCE_VIEW_DESC: d3d11.SHADER_RESOURCE_VIEW_DESC{ + Format: d3dfmt, + ViewDimension: d3d11.SRV_DIMENSION_TEXTURE2D, + }, + Texture2D: d3d11.TEX2D_SRV{ + MostDetailedMip: 0, + MipLevels: ^uint32(0), + }, + }), + ) + if err != nil { + d3d11.IUnknownRelease(unsafe.Pointer(tex), tex.Vtbl.Release) + d3d11.IUnknownRelease(unsafe.Pointer(sampler), sampler.Vtbl.Release) + return nil, err + } + } + if bindings&driver.BufferBindingShaderStorageWrite != 0 { + uaView, err = b.dev.CreateUnorderedAccessView( + (*d3d11.Resource)(unsafe.Pointer(tex)), + unsafe.Pointer(&d3d11.UNORDERED_ACCESS_VIEW_DESC_TEX2D{ + UNORDERED_ACCESS_VIEW_DESC: d3d11.UNORDERED_ACCESS_VIEW_DESC{ + Format: d3dfmt, + ViewDimension: d3d11.UAV_DIMENSION_TEXTURE2D, + }, + Texture2D: d3d11.TEX2D_UAV{ + MipSlice: 0, + }, + }), + ) + if err != nil { + if sampler != nil { + d3d11.IUnknownRelease(unsafe.Pointer(sampler), sampler.Vtbl.Release) + } + if resView != nil { + d3d11.IUnknownRelease(unsafe.Pointer(resView), resView.Vtbl.Release) + } + d3d11.IUnknownRelease(unsafe.Pointer(tex), tex.Vtbl.Release) + return nil, err + } + } + if bindings&driver.BufferBindingFramebuffer != 0 { + resource := (*d3d11.Resource)(unsafe.Pointer(tex)) + fbo, err = b.dev.CreateRenderTargetView(resource) + if err != nil { + if uaView != nil { + d3d11.IUnknownRelease(unsafe.Pointer(uaView), uaView.Vtbl.Release) + } + if sampler != nil { + d3d11.IUnknownRelease(unsafe.Pointer(sampler), sampler.Vtbl.Release) + } + if resView != nil { + d3d11.IUnknownRelease(unsafe.Pointer(resView), resView.Vtbl.Release) + } + d3d11.IUnknownRelease(unsafe.Pointer(tex), tex.Vtbl.Release) + return nil, err + } + } + return &Texture{backend: b, format: d3dfmt, tex: tex, sampler: sampler, resView: resView, uaView: uaView, renderTarget: fbo, bindings: bindings, width: width, height: height}, nil +} + +func (b *Backend) newInputLayout(vertexShader shader.Sources, layout []driver.InputDesc) (*d3d11.InputLayout, error) { + if len(vertexShader.Inputs) != len(layout) { + return nil, fmt.Errorf("NewInputLayout: got %d inputs, expected %d", len(layout), len(vertexShader.Inputs)) + } + descs := make([]d3d11.INPUT_ELEMENT_DESC, len(layout)) + for i, l := range layout { + inp := vertexShader.Inputs[i] + cname, err := windows.BytePtrFromString(inp.Semantic) + if err != nil { + return nil, err + } + var format uint32 + switch l.Type { + case shader.DataTypeFloat: + switch l.Size { + case 1: + format = d3d11.DXGI_FORMAT_R32_FLOAT + case 2: + format = d3d11.DXGI_FORMAT_R32G32_FLOAT + case 3: + format = d3d11.DXGI_FORMAT_R32G32B32_FLOAT + case 4: + format = d3d11.DXGI_FORMAT_R32G32B32A32_FLOAT + default: + panic("unsupported data size") + } + case shader.DataTypeShort: + switch l.Size { + case 1: + format = d3d11.DXGI_FORMAT_R16_SINT + case 2: + format = d3d11.DXGI_FORMAT_R16G16_SINT + default: + panic("unsupported data size") + } + default: + panic("unsupported data type") + } + descs[i] = d3d11.INPUT_ELEMENT_DESC{ + SemanticName: cname, + SemanticIndex: uint32(inp.SemanticIndex), + Format: format, + AlignedByteOffset: uint32(l.Offset), + } + } + return b.dev.CreateInputLayout(descs, []byte(vertexShader.DXBC)) +} + +func (b *Backend) NewBuffer(typ driver.BufferBinding, size int) (driver.Buffer, error) { + return b.newBuffer(typ, size, nil, false) +} + +func (b *Backend) NewImmutableBuffer(typ driver.BufferBinding, data []byte) (driver.Buffer, error) { + return b.newBuffer(typ, len(data), data, true) +} + +func (b *Backend) newBuffer(typ driver.BufferBinding, size int, data []byte, immutable bool) (*Buffer, error) { + if typ&driver.BufferBindingUniforms != 0 { + if typ != driver.BufferBindingUniforms { + return nil, errors.New("uniform buffers cannot have other bindings") + } + if size%16 != 0 { + return nil, fmt.Errorf("constant buffer size is %d, expected a multiple of 16", size) + } + } + bind := convBufferBinding(typ) + var usage, miscFlags, cpuFlags uint32 + if immutable { + usage = d3d11.USAGE_IMMUTABLE + } + if typ&driver.BufferBindingShaderStorageWrite != 0 { + cpuFlags = d3d11.CPU_ACCESS_READ + } + if typ&(driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite) != 0 { + miscFlags |= d3d11.RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS + } + buf, err := b.dev.CreateBuffer(&d3d11.BUFFER_DESC{ + ByteWidth: uint32(size), + Usage: usage, + BindFlags: bind, + CPUAccessFlags: cpuFlags, + MiscFlags: miscFlags, + }, data) + if err != nil { + return nil, err + } + var ( + resView *d3d11.ShaderResourceView + uaView *d3d11.UnorderedAccessView + ) + if typ&driver.BufferBindingShaderStorageWrite != 0 { + uaView, err = b.dev.CreateUnorderedAccessView( + (*d3d11.Resource)(unsafe.Pointer(buf)), + unsafe.Pointer(&d3d11.UNORDERED_ACCESS_VIEW_DESC_BUFFER{ + UNORDERED_ACCESS_VIEW_DESC: d3d11.UNORDERED_ACCESS_VIEW_DESC{ + Format: d3d11.DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d11.UAV_DIMENSION_BUFFER, + }, + Buffer: d3d11.BUFFER_UAV{ + FirstElement: 0, + NumElements: uint32(size / 4), + Flags: d3d11.BUFFER_UAV_FLAG_RAW, + }, + }), + ) + if err != nil { + d3d11.IUnknownRelease(unsafe.Pointer(buf), buf.Vtbl.Release) + return nil, err + } + } else if typ&driver.BufferBindingShaderStorageRead != 0 { + resView, err = b.dev.CreateShaderResourceView( + (*d3d11.Resource)(unsafe.Pointer(buf)), + unsafe.Pointer(&d3d11.SHADER_RESOURCE_VIEW_DESC_BUFFEREX{ + SHADER_RESOURCE_VIEW_DESC: d3d11.SHADER_RESOURCE_VIEW_DESC{ + Format: d3d11.DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d11.SRV_DIMENSION_BUFFEREX, + }, + Buffer: d3d11.BUFFEREX_SRV{ + FirstElement: 0, + NumElements: uint32(size / 4), + Flags: d3d11.BUFFEREX_SRV_FLAG_RAW, + }, + }), + ) + if err != nil { + d3d11.IUnknownRelease(unsafe.Pointer(buf), buf.Vtbl.Release) + return nil, err + } + } + return &Buffer{backend: b, buf: buf, bind: bind, size: size, resView: resView, uaView: uaView, immutable: immutable}, nil +} + +func (b *Backend) NewComputeProgram(shader shader.Sources) (driver.Program, error) { + cs, err := b.dev.CreateComputeShader([]byte(shader.DXBC)) + if err != nil { + return nil, err + } + return &Program{backend: b, shader: cs}, nil +} + +func (b *Backend) NewPipeline(desc driver.PipelineDesc) (driver.Pipeline, error) { + vsh := desc.VertexShader.(*VertexShader) + fsh := desc.FragmentShader.(*FragmentShader) + blend, err := b.newBlendState(desc.BlendDesc) + if err != nil { + return nil, err + } + var layout *d3d11.InputLayout + if l := desc.VertexLayout; l.Stride > 0 { + var err error + layout, err = b.newInputLayout(vsh.src, l.Inputs) + if err != nil { + d3d11.IUnknownRelease(unsafe.Pointer(blend), blend.Vtbl.AddRef) + return nil, err + } + } + + // Retain shaders. + vshRef := vsh.shader + fshRef := fsh.shader + d3d11.IUnknownAddRef(unsafe.Pointer(vshRef), vshRef.Vtbl.AddRef) + d3d11.IUnknownAddRef(unsafe.Pointer(fshRef), fshRef.Vtbl.AddRef) + + return &Pipeline{ + vert: vshRef, + frag: fshRef, + layout: layout, + stride: desc.VertexLayout.Stride, + blend: blend, + topology: desc.Topology, + }, nil +} + +func (b *Backend) newBlendState(desc driver.BlendDesc) (*d3d11.BlendState, error) { + var d3ddesc d3d11.BLEND_DESC + t0 := &d3ddesc.RenderTarget[0] + t0.RenderTargetWriteMask = d3d11.COLOR_WRITE_ENABLE_ALL + t0.BlendOp = d3d11.BLEND_OP_ADD + t0.BlendOpAlpha = d3d11.BLEND_OP_ADD + if desc.Enable { + t0.BlendEnable = 1 + } + scol, salpha := toBlendFactor(desc.SrcFactor) + dcol, dalpha := toBlendFactor(desc.DstFactor) + t0.SrcBlend = scol + t0.SrcBlendAlpha = salpha + t0.DestBlend = dcol + t0.DestBlendAlpha = dalpha + return b.dev.CreateBlendState(&d3ddesc) +} + +func (b *Backend) NewVertexShader(src shader.Sources) (driver.VertexShader, error) { + vs, err := b.dev.CreateVertexShader([]byte(src.DXBC)) + if err != nil { + return nil, err + } + return &VertexShader{b, vs, src}, nil +} + +func (b *Backend) NewFragmentShader(src shader.Sources) (driver.FragmentShader, error) { + fs, err := b.dev.CreatePixelShader([]byte(src.DXBC)) + if err != nil { + return nil, err + } + return &FragmentShader{b, fs}, nil +} + +func (b *Backend) Viewport(x, y, width, height int) { + b.viewport = d3d11.VIEWPORT{ + TopLeftX: float32(x), + TopLeftY: float32(y), + Width: float32(width), + Height: float32(height), + MinDepth: 0.0, + MaxDepth: 1.0, + } + b.ctx.RSSetViewports(&b.viewport) +} + +func (b *Backend) DrawArrays(off, count int) { + b.prepareDraw() + b.ctx.Draw(uint32(count), uint32(off)) +} + +func (b *Backend) DrawElements(off, count int) { + b.prepareDraw() + b.ctx.DrawIndexed(uint32(count), uint32(off), 0) +} + +func (b *Backend) prepareDraw() { + p := b.pipeline + if p == nil { + return + } + b.ctx.VSSetShader(p.vert) + b.ctx.PSSetShader(p.frag) + b.ctx.IASetInputLayout(p.layout) + b.ctx.OMSetBlendState(p.blend, nil, 0xffffffff) + if b.vert.buffer != nil { + b.ctx.IASetVertexBuffers(b.vert.buffer.buf, uint32(p.stride), uint32(b.vert.offset)) + } + var topology uint32 + switch p.topology { + case driver.TopologyTriangles: + topology = d3d11.PRIMITIVE_TOPOLOGY_TRIANGLELIST + case driver.TopologyTriangleStrip: + topology = d3d11.PRIMITIVE_TOPOLOGY_TRIANGLESTRIP + default: + panic("unsupported draw mode") + } + b.ctx.IASetPrimitiveTopology(topology) +} + +func (b *Backend) BindImageTexture(unit int, tex driver.Texture) { + t := tex.(*Texture) + if t.uaView != nil { + b.ctx.CSSetUnorderedAccessViews(uint32(unit), t.uaView) + } else { + b.ctx.CSSetShaderResources(uint32(unit), t.resView) + } +} + +func (b *Backend) DispatchCompute(x, y, z int) { + b.ctx.CSSetShader(b.program.shader) + b.ctx.Dispatch(uint32(x), uint32(y), uint32(z)) +} + +func (t *Texture) Upload(offset, size image.Point, pixels []byte, stride int) { + if stride == 0 { + stride = size.X * 4 + } + dst := &d3d11.BOX{ + Left: uint32(offset.X), + Top: uint32(offset.Y), + Right: uint32(offset.X + size.X), + Bottom: uint32(offset.Y + size.Y), + Front: 0, + Back: 1, + } + res := (*d3d11.Resource)(unsafe.Pointer(t.tex)) + t.backend.ctx.UpdateSubresource(res, dst, uint32(stride), uint32(len(pixels)), pixels) +} + +func (t *Texture) Release() { + if t.foreign { + panic("texture not created by NewTexture") + } + if t.renderTarget != nil { + d3d11.IUnknownRelease(unsafe.Pointer(t.renderTarget), t.renderTarget.Vtbl.Release) + } + if t.sampler != nil { + d3d11.IUnknownRelease(unsafe.Pointer(t.sampler), t.sampler.Vtbl.Release) + } + if t.resView != nil { + d3d11.IUnknownRelease(unsafe.Pointer(t.resView), t.resView.Vtbl.Release) + } + if t.uaView != nil { + d3d11.IUnknownRelease(unsafe.Pointer(t.uaView), t.uaView.Vtbl.Release) + } + d3d11.IUnknownRelease(unsafe.Pointer(t.tex), t.tex.Vtbl.Release) + *t = Texture{} +} + +func (b *Backend) PrepareTexture(tex driver.Texture) {} + +func (b *Backend) BindTexture(unit int, tex driver.Texture) { + t := tex.(*Texture) + b.ctx.PSSetSamplers(uint32(unit), t.sampler) + b.ctx.PSSetShaderResources(uint32(unit), t.resView) +} + +func (b *Backend) BindPipeline(pipe driver.Pipeline) { + b.pipeline = pipe.(*Pipeline) +} + +func (b *Backend) BindProgram(prog driver.Program) { + b.program = prog.(*Program) +} + +func (s *VertexShader) Release() { + d3d11.IUnknownRelease(unsafe.Pointer(s.shader), s.shader.Vtbl.Release) + *s = VertexShader{} +} + +func (s *FragmentShader) Release() { + d3d11.IUnknownRelease(unsafe.Pointer(s.shader), s.shader.Vtbl.Release) + *s = FragmentShader{} +} + +func (s *Program) Release() { + d3d11.IUnknownRelease(unsafe.Pointer(s.shader), s.shader.Vtbl.Release) + *s = Program{} +} + +func (p *Pipeline) Release() { + d3d11.IUnknownRelease(unsafe.Pointer(p.vert), p.vert.Vtbl.Release) + d3d11.IUnknownRelease(unsafe.Pointer(p.frag), p.frag.Vtbl.Release) + d3d11.IUnknownRelease(unsafe.Pointer(p.blend), p.blend.Vtbl.Release) + if l := p.layout; l != nil { + d3d11.IUnknownRelease(unsafe.Pointer(l), l.Vtbl.Release) + } + *p = Pipeline{} +} + +func (b *Backend) BindStorageBuffer(binding int, buffer driver.Buffer) { + buf := buffer.(*Buffer) + if buf.resView != nil { + b.ctx.CSSetShaderResources(uint32(binding), buf.resView) + } else { + b.ctx.CSSetUnorderedAccessViews(uint32(binding), buf.uaView) + } +} + +func (b *Backend) BindUniforms(buffer driver.Buffer) { + buf := buffer.(*Buffer) + b.ctx.VSSetConstantBuffers(buf.buf) + b.ctx.PSSetConstantBuffers(buf.buf) +} + +func (b *Backend) BindVertexBuffer(buf driver.Buffer, offset int) { + b.vert.buffer = buf.(*Buffer) + b.vert.offset = offset +} + +func (b *Backend) BindIndexBuffer(buf driver.Buffer) { + b.ctx.IASetIndexBuffer(buf.(*Buffer).buf, d3d11.DXGI_FORMAT_R16_UINT, 0) +} + +func (b *Buffer) Download(dst []byte) error { + res := (*d3d11.Resource)(unsafe.Pointer(b.buf)) + resMap, err := b.backend.ctx.Map(res, 0, d3d11.MAP_READ, 0) + if err != nil { + return fmt.Errorf("d3d11: %v", err) + } + defer b.backend.ctx.Unmap(res, 0) + data := sliceOf(resMap.PData, len(dst)) + copy(dst, data) + return nil +} + +func (b *Buffer) Upload(data []byte) { + var dst *d3d11.BOX + if len(data) < b.size { + dst = &d3d11.BOX{ + Left: 0, + Right: uint32(len(data)), + Top: 0, + Bottom: 1, + Front: 0, + Back: 1, + } + } + b.backend.ctx.UpdateSubresource((*d3d11.Resource)(unsafe.Pointer(b.buf)), dst, 0, 0, data) +} + +func (b *Buffer) Release() { + if b.resView != nil { + d3d11.IUnknownRelease(unsafe.Pointer(b.resView), b.resView.Vtbl.Release) + } + if b.uaView != nil { + d3d11.IUnknownRelease(unsafe.Pointer(b.uaView), b.uaView.Vtbl.Release) + } + d3d11.IUnknownRelease(unsafe.Pointer(b.buf), b.buf.Vtbl.Release) + *b = Buffer{} +} + +func (t *Texture) ReadPixels(src image.Rectangle, pixels []byte, stride int) error { + w, h := src.Dx(), src.Dy() + tex, err := t.backend.dev.CreateTexture2D(&d3d11.TEXTURE2D_DESC{ + Width: uint32(w), + Height: uint32(h), + MipLevels: 1, + ArraySize: 1, + Format: t.format, + SampleDesc: d3d11.DXGI_SAMPLE_DESC{ + Count: 1, + Quality: 0, + }, + Usage: d3d11.USAGE_STAGING, + CPUAccessFlags: d3d11.CPU_ACCESS_READ, + }) + if err != nil { + return fmt.Errorf("ReadPixels: %v", err) + } + defer d3d11.IUnknownRelease(unsafe.Pointer(tex), tex.Vtbl.Release) + res := (*d3d11.Resource)(unsafe.Pointer(tex)) + t.backend.ctx.CopySubresourceRegion( + res, + 0, // Destination subresource. + 0, 0, 0, // Destination coordinates (x, y, z). + (*d3d11.Resource)(t.tex), + 0, // Source subresource. + &d3d11.BOX{ + Left: uint32(src.Min.X), + Top: uint32(src.Min.Y), + Right: uint32(src.Max.X), + Bottom: uint32(src.Max.Y), + Front: 0, + Back: 1, + }, + ) + resMap, err := t.backend.ctx.Map(res, 0, d3d11.MAP_READ, 0) + if err != nil { + return fmt.Errorf("ReadPixels: %v", err) + } + defer t.backend.ctx.Unmap(res, 0) + srcPitch := stride + dstPitch := int(resMap.RowPitch) + mapSize := dstPitch * h + data := sliceOf(resMap.PData, mapSize) + width := w * 4 + for r := 0; r < h; r++ { + pixels := pixels[r*srcPitch:] + copy(pixels[:width], data[r*dstPitch:]) + } + return nil +} + +func (b *Backend) BeginCompute() { +} + +func (b *Backend) EndCompute() { +} + +func (b *Backend) BeginRenderPass(tex driver.Texture, d driver.LoadDesc) { + t := tex.(*Texture) + b.ctx.OMSetRenderTargets(t.renderTarget, nil) + if d.Action == driver.LoadActionClear { + c := d.ClearColor + b.clearColor = [4]float32{c.R, c.G, c.B, c.A} + b.ctx.ClearRenderTargetView(t.renderTarget, &b.clearColor) + } +} + +func (b *Backend) EndRenderPass() { +} + +func (f *Texture) ImplementsRenderTarget() {} + +func convBufferBinding(typ driver.BufferBinding) uint32 { + var bindings uint32 + if typ&driver.BufferBindingVertices != 0 { + bindings |= d3d11.BIND_VERTEX_BUFFER + } + if typ&driver.BufferBindingIndices != 0 { + bindings |= d3d11.BIND_INDEX_BUFFER + } + if typ&driver.BufferBindingUniforms != 0 { + bindings |= d3d11.BIND_CONSTANT_BUFFER + } + if typ&driver.BufferBindingTexture != 0 { + bindings |= d3d11.BIND_SHADER_RESOURCE + } + if typ&driver.BufferBindingFramebuffer != 0 { + bindings |= d3d11.BIND_RENDER_TARGET + } + if typ&driver.BufferBindingShaderStorageWrite != 0 { + bindings |= d3d11.BIND_UNORDERED_ACCESS + } else if typ&driver.BufferBindingShaderStorageRead != 0 { + bindings |= d3d11.BIND_SHADER_RESOURCE + } + return bindings +} + +func toBlendFactor(f driver.BlendFactor) (uint32, uint32) { + switch f { + case driver.BlendFactorOne: + return d3d11.BLEND_ONE, d3d11.BLEND_ONE + case driver.BlendFactorOneMinusSrcAlpha: + return d3d11.BLEND_INV_SRC_ALPHA, d3d11.BLEND_INV_SRC_ALPHA + case driver.BlendFactorZero: + return d3d11.BLEND_ZERO, d3d11.BLEND_ZERO + case driver.BlendFactorDstColor: + return d3d11.BLEND_DEST_COLOR, d3d11.BLEND_DEST_ALPHA + default: + panic("unsupported blend source factor") + } +} + +// sliceOf returns a slice from a (native) pointer. +func sliceOf(ptr uintptr, cap int) []byte { + var data []byte + h := (*reflect.SliceHeader)(unsafe.Pointer(&data)) + h.Data = ptr + h.Cap = cap + h.Len = cap + return data +} diff --git a/vendor/gioui.org/gpu/internal/driver/api.go b/vendor/gioui.org/gpu/internal/driver/api.go new file mode 100644 index 0000000..9a762a6 --- /dev/null +++ b/vendor/gioui.org/gpu/internal/driver/api.go @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package driver + +import ( + "fmt" + "unsafe" + + "gioui.org/internal/gl" +) + +// See gpu/api.go for documentation for the API types. + +type API interface { + implementsAPI() +} + +type RenderTarget interface { + ImplementsRenderTarget() +} + +type OpenGLRenderTarget gl.Framebuffer + +type Direct3D11RenderTarget struct { + // RenderTarget is a *ID3D11RenderTargetView. + RenderTarget unsafe.Pointer +} + +type MetalRenderTarget struct { + // Texture is a MTLTexture. + Texture uintptr +} + +type VulkanRenderTarget struct { + // WaitSem is a VkSemaphore that must signaled before accessing Framebuffer. + WaitSem uint64 + // SignalSem is a VkSemaphore that signal access to Framebuffer is complete. + SignalSem uint64 + // Image is the VkImage to render into. + Image uint64 + // Framebuffer is a VkFramebuffer for Image. + Framebuffer uint64 +} + +type OpenGL struct { + // ES forces the use of ANGLE OpenGL ES libraries on macOS. It is + // ignored on all other platforms. + ES bool + // Context contains the WebGL context for WebAssembly platforms. It is + // empty for all other platforms; an OpenGL context is assumed current when + // calling NewDevice. + Context gl.Context + // Shared instructs users of the context to restore the GL state after + // use. + Shared bool +} + +type Direct3D11 struct { + // Device contains a *ID3D11Device. + Device unsafe.Pointer +} + +type Metal struct { + // Device is an MTLDevice. + Device uintptr + // Queue is a MTLCommandQueue. + Queue uintptr + // PixelFormat is the MTLPixelFormat of the default framebuffer. + PixelFormat int +} + +type Vulkan struct { + // PhysDevice is a VkPhysicalDevice. + PhysDevice unsafe.Pointer + // Device is a VkDevice. + Device unsafe.Pointer + // QueueFamily is the queue familily index of the queue. + QueueFamily int + // QueueIndex is the logical queue index of the queue. + QueueIndex int + // Format is a VkFormat that matches render targets. + Format int +} + +// API specific device constructors. +var ( + NewOpenGLDevice func(api OpenGL) (Device, error) + NewDirect3D11Device func(api Direct3D11) (Device, error) + NewMetalDevice func(api Metal) (Device, error) + NewVulkanDevice func(api Vulkan) (Device, error) +) + +// NewDevice creates a new Device given the api. +// +// Note that the device does not assume ownership of the resources contained in +// api; the caller must ensure the resources are valid until the device is +// released. +func NewDevice(api API) (Device, error) { + switch api := api.(type) { + case OpenGL: + if NewOpenGLDevice != nil { + return NewOpenGLDevice(api) + } + case Direct3D11: + if NewDirect3D11Device != nil { + return NewDirect3D11Device(api) + } + case Metal: + if NewMetalDevice != nil { + return NewMetalDevice(api) + } + case Vulkan: + if NewVulkanDevice != nil { + return NewVulkanDevice(api) + } + } + return nil, fmt.Errorf("driver: no driver available for the API %T", api) +} + +func (OpenGL) implementsAPI() {} +func (Direct3D11) implementsAPI() {} +func (Metal) implementsAPI() {} +func (Vulkan) implementsAPI() {} +func (OpenGLRenderTarget) ImplementsRenderTarget() {} +func (Direct3D11RenderTarget) ImplementsRenderTarget() {} +func (MetalRenderTarget) ImplementsRenderTarget() {} +func (VulkanRenderTarget) ImplementsRenderTarget() {} diff --git a/vendor/gioui.org/gpu/internal/driver/driver.go b/vendor/gioui.org/gpu/internal/driver/driver.go new file mode 100644 index 0000000..58cb89b --- /dev/null +++ b/vendor/gioui.org/gpu/internal/driver/driver.go @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package driver + +import ( + "errors" + "image" + "time" + + "gioui.org/internal/f32color" + "gioui.org/shader" +) + +// Device represents the abstraction of underlying GPU +// APIs such as OpenGL, Direct3D useful for rendering Gio +// operations. +type Device interface { + BeginFrame(target RenderTarget, clear bool, viewport image.Point) Texture + EndFrame() + Caps() Caps + NewTimer() Timer + // IsContinuousTime reports whether all timer measurements + // are valid at the point of call. + IsTimeContinuous() bool + NewTexture(format TextureFormat, width, height int, minFilter, magFilter TextureFilter, bindings BufferBinding) (Texture, error) + NewImmutableBuffer(typ BufferBinding, data []byte) (Buffer, error) + NewBuffer(typ BufferBinding, size int) (Buffer, error) + NewComputeProgram(shader shader.Sources) (Program, error) + NewVertexShader(src shader.Sources) (VertexShader, error) + NewFragmentShader(src shader.Sources) (FragmentShader, error) + NewPipeline(desc PipelineDesc) (Pipeline, error) + + Viewport(x, y, width, height int) + DrawArrays(off, count int) + DrawElements(off, count int) + + BeginRenderPass(t Texture, desc LoadDesc) + EndRenderPass() + PrepareTexture(t Texture) + BindProgram(p Program) + BindPipeline(p Pipeline) + BindTexture(unit int, t Texture) + BindVertexBuffer(b Buffer, offset int) + BindIndexBuffer(b Buffer) + BindImageTexture(unit int, texture Texture) + BindUniforms(buf Buffer) + BindStorageBuffer(binding int, buf Buffer) + + BeginCompute() + EndCompute() + CopyTexture(dst Texture, dstOrigin image.Point, src Texture, srcRect image.Rectangle) + DispatchCompute(x, y, z int) + + Release() +} + +var ErrDeviceLost = errors.New("GPU device lost") + +type LoadDesc struct { + Action LoadAction + ClearColor f32color.RGBA +} + +type Pipeline interface { + Release() +} + +type PipelineDesc struct { + VertexShader VertexShader + FragmentShader FragmentShader + VertexLayout VertexLayout + BlendDesc BlendDesc + PixelFormat TextureFormat + Topology Topology +} + +type VertexLayout struct { + Inputs []InputDesc + Stride int +} + +// InputDesc describes a vertex attribute as laid out in a Buffer. +type InputDesc struct { + Type shader.DataType + Size int + + Offset int +} + +type BlendDesc struct { + Enable bool + SrcFactor, DstFactor BlendFactor +} + +type BlendFactor uint8 + +type Topology uint8 + +type TextureFilter uint8 +type TextureFormat uint8 + +type BufferBinding uint8 + +type LoadAction uint8 + +type Features uint + +type Caps struct { + // BottomLeftOrigin is true if the driver has the origin in the lower left + // corner. The OpenGL driver returns true. + BottomLeftOrigin bool + Features Features + MaxTextureSize int +} + +type VertexShader interface { + Release() +} + +type FragmentShader interface { + Release() +} + +type Program interface { + Release() +} + +type Buffer interface { + Release() + Upload(data []byte) + Download(data []byte) error +} + +type Timer interface { + Begin() + End() + Duration() (time.Duration, bool) + Release() +} + +type Texture interface { + RenderTarget + Upload(offset, size image.Point, pixels []byte, stride int) + ReadPixels(src image.Rectangle, pixels []byte, stride int) error + Release() +} + +const ( + BufferBindingIndices BufferBinding = 1 << iota + BufferBindingVertices + BufferBindingUniforms + BufferBindingTexture + BufferBindingFramebuffer + BufferBindingShaderStorageRead + BufferBindingShaderStorageWrite +) + +const ( + TextureFormatSRGBA TextureFormat = iota + TextureFormatFloat + TextureFormatRGBA8 + // TextureFormatOutput denotes the format used by the output framebuffer. + TextureFormatOutput +) + +const ( + FilterNearest TextureFilter = iota + FilterLinear +) + +const ( + FeatureTimers Features = 1 << iota + FeatureFloatRenderTargets + FeatureCompute + FeatureSRGB +) + +const ( + TopologyTriangleStrip Topology = iota + TopologyTriangles +) + +const ( + BlendFactorOne BlendFactor = iota + BlendFactorOneMinusSrcAlpha + BlendFactorZero + BlendFactorDstColor +) + +const ( + LoadActionKeep LoadAction = iota + LoadActionClear + LoadActionInvalidate +) + +var ErrContentLost = errors.New("buffer content lost") + +func (f Features) Has(feats Features) bool { + return f&feats == feats +} + +func DownloadImage(d Device, t Texture, img *image.RGBA) error { + r := img.Bounds() + if err := t.ReadPixels(r, img.Pix, img.Stride); err != nil { + return err + } + if d.Caps().BottomLeftOrigin { + // OpenGL origin is in the lower-left corner. Flip the image to + // match. + flipImageY(r.Dx()*4, r.Dy(), img.Pix) + } + return nil +} + +func flipImageY(stride, height int, pixels []byte) { + // Flip image in y-direction. OpenGL's origin is in the lower + // left corner. + row := make([]uint8, stride) + for y := 0; y < height/2; y++ { + y1 := height - y - 1 + dest := y1 * stride + src := y * stride + copy(row, pixels[dest:]) + copy(pixels[dest:], pixels[src:src+len(row)]) + copy(pixels[src:], row) + } +} + +func UploadImage(t Texture, offset image.Point, img *image.RGBA) { + var pixels []byte + size := img.Bounds().Size() + min := img.Rect.Min + start := img.PixOffset(min.X, min.Y) + end := img.PixOffset(min.X+size.X, min.Y+size.Y-1) + pixels = img.Pix[start:end] + t.Upload(offset, size, pixels, img.Stride) +} diff --git a/vendor/gioui.org/gpu/internal/metal/metal.go b/vendor/gioui.org/gpu/internal/metal/metal.go new file mode 100644 index 0000000..b9739af --- /dev/null +++ b/vendor/gioui.org/gpu/internal/metal/metal.go @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +// This file exists so this package builds on non-Darwin platforms. + +package metal diff --git a/vendor/gioui.org/gpu/internal/metal/metal_darwin.go b/vendor/gioui.org/gpu/internal/metal/metal_darwin.go new file mode 100644 index 0000000..c180731 --- /dev/null +++ b/vendor/gioui.org/gpu/internal/metal/metal_darwin.go @@ -0,0 +1,1141 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package metal + +import ( + "errors" + "fmt" + "image" + "unsafe" + + "gioui.org/gpu/internal/driver" + "gioui.org/shader" +) + +/* +#cgo CFLAGS: -Werror -xobjective-c -fmodules -fobjc-arc +#cgo LDFLAGS: -framework CoreGraphics + +@import Metal; + +#include <CoreFoundation/CoreFoundation.h> +#include <Metal/Metal.h> + +typedef struct { + void *addr; + NSUInteger size; +} slice; + +static CFTypeRef queueNewBuffer(CFTypeRef queueRef) { + @autoreleasepool { + id<MTLCommandQueue> queue = (__bridge id<MTLCommandQueue>)queueRef; + return CFBridgingRetain([queue commandBuffer]); + } +} + +static void cmdBufferCommit(CFTypeRef cmdBufRef) { + @autoreleasepool { + id<MTLCommandBuffer> cmdBuf = (__bridge id<MTLCommandBuffer>)cmdBufRef; + [cmdBuf commit]; + } +} + +static void cmdBufferWaitUntilCompleted(CFTypeRef cmdBufRef) { + @autoreleasepool { + id<MTLCommandBuffer> cmdBuf = (__bridge id<MTLCommandBuffer>)cmdBufRef; + [cmdBuf waitUntilCompleted]; + } +} + +static CFTypeRef cmdBufferRenderEncoder(CFTypeRef cmdBufRef, CFTypeRef textureRef, MTLLoadAction act, float r, float g, float b, float a) { + @autoreleasepool { + id<MTLCommandBuffer> cmdBuf = (__bridge id<MTLCommandBuffer>)cmdBufRef; + MTLRenderPassDescriptor *desc = [MTLRenderPassDescriptor new]; + desc.colorAttachments[0].texture = (__bridge id<MTLTexture>)textureRef; + desc.colorAttachments[0].loadAction = act; + desc.colorAttachments[0].clearColor = MTLClearColorMake(r, g, b, a); + return CFBridgingRetain([cmdBuf renderCommandEncoderWithDescriptor:desc]); + } +} + +static CFTypeRef cmdBufferComputeEncoder(CFTypeRef cmdBufRef) { + @autoreleasepool { + id<MTLCommandBuffer> cmdBuf = (__bridge id<MTLCommandBuffer>)cmdBufRef; + return CFBridgingRetain([cmdBuf computeCommandEncoder]); + } +} + +static CFTypeRef cmdBufferBlitEncoder(CFTypeRef cmdBufRef) { + @autoreleasepool { + id<MTLCommandBuffer> cmdBuf = (__bridge id<MTLCommandBuffer>)cmdBufRef; + return CFBridgingRetain([cmdBuf blitCommandEncoder]); + } +} + +static void renderEncEnd(CFTypeRef renderEncRef) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + [enc endEncoding]; + } +} + +static void renderEncViewport(CFTypeRef renderEncRef, MTLViewport viewport) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + [enc setViewport:viewport]; + } +} + +static void renderEncSetFragmentTexture(CFTypeRef renderEncRef, NSUInteger index, CFTypeRef texRef) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + id<MTLTexture> tex = (__bridge id<MTLTexture>)texRef; + [enc setFragmentTexture:tex atIndex:index]; + } +} + +static void renderEncSetFragmentSamplerState(CFTypeRef renderEncRef, NSUInteger index, CFTypeRef samplerRef) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + id<MTLSamplerState> sampler = (__bridge id<MTLSamplerState>)samplerRef; + [enc setFragmentSamplerState:sampler atIndex:index]; + } +} + +static void renderEncSetVertexBuffer(CFTypeRef renderEncRef, CFTypeRef bufRef, NSUInteger idx, NSUInteger offset) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + id<MTLBuffer> buf = (__bridge id<MTLBuffer>)bufRef; + [enc setVertexBuffer:buf offset:offset atIndex:idx]; + } +} + +static void renderEncSetFragmentBuffer(CFTypeRef renderEncRef, CFTypeRef bufRef, NSUInteger idx, NSUInteger offset) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + id<MTLBuffer> buf = (__bridge id<MTLBuffer>)bufRef; + [enc setFragmentBuffer:buf offset:offset atIndex:idx]; + } +} + +static void renderEncSetFragmentBytes(CFTypeRef renderEncRef, const void *bytes, NSUInteger length, NSUInteger idx) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + [enc setFragmentBytes:bytes length:length atIndex:idx]; + } +} + +static void renderEncSetVertexBytes(CFTypeRef renderEncRef, const void *bytes, NSUInteger length, NSUInteger idx) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + [enc setVertexBytes:bytes length:length atIndex:idx]; + } +} + +static void renderEncSetRenderPipelineState(CFTypeRef renderEncRef, CFTypeRef pipeRef) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + id<MTLRenderPipelineState> pipe = (__bridge id<MTLRenderPipelineState>)pipeRef; + [enc setRenderPipelineState:pipe]; + } +} + +static void renderEncDrawPrimitives(CFTypeRef renderEncRef, MTLPrimitiveType type, NSUInteger start, NSUInteger count) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + [enc drawPrimitives:type vertexStart:start vertexCount:count]; + } +} + +static void renderEncDrawIndexedPrimitives(CFTypeRef renderEncRef, MTLPrimitiveType type, CFTypeRef bufRef, NSUInteger offset, NSUInteger count) { + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (__bridge id<MTLRenderCommandEncoder>)renderEncRef; + id<MTLBuffer> buf = (__bridge id<MTLBuffer>)bufRef; + [enc drawIndexedPrimitives:type indexCount:count indexType:MTLIndexTypeUInt16 indexBuffer:buf indexBufferOffset:offset]; + } +} + +static void computeEncSetPipeline(CFTypeRef computeEncRef, CFTypeRef pipeRef) { + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (__bridge id<MTLComputeCommandEncoder>)computeEncRef; + id<MTLComputePipelineState> pipe = (__bridge id<MTLComputePipelineState>)pipeRef; + [enc setComputePipelineState:pipe]; + } +} + +static void computeEncSetTexture(CFTypeRef computeEncRef, NSUInteger index, CFTypeRef texRef) { + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (__bridge id<MTLComputeCommandEncoder>)computeEncRef; + id<MTLTexture> tex = (__bridge id<MTLTexture>)texRef; + [enc setTexture:tex atIndex:index]; + } +} + +static void computeEncEnd(CFTypeRef computeEncRef) { + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (__bridge id<MTLComputeCommandEncoder>)computeEncRef; + [enc endEncoding]; + } +} + +static void computeEncSetBuffer(CFTypeRef computeEncRef, NSUInteger index, CFTypeRef bufRef) { + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (__bridge id<MTLComputeCommandEncoder>)computeEncRef; + id<MTLBuffer> buf = (__bridge id<MTLBuffer>)bufRef; + [enc setBuffer:buf offset:0 atIndex:index]; + } +} + +static void computeEncDispatch(CFTypeRef computeEncRef, MTLSize threadgroupsPerGrid, MTLSize threadsPerThreadgroup) { + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (__bridge id<MTLComputeCommandEncoder>)computeEncRef; + [enc dispatchThreadgroups:threadgroupsPerGrid threadsPerThreadgroup:threadsPerThreadgroup]; + } +} + +static void computeEncSetBytes(CFTypeRef computeEncRef, const void *bytes, NSUInteger length, NSUInteger index) { + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (__bridge id<MTLComputeCommandEncoder>)computeEncRef; + [enc setBytes:bytes length:length atIndex:index]; + } +} + +static void blitEncEnd(CFTypeRef blitEncRef) { + @autoreleasepool { + id<MTLBlitCommandEncoder> enc = (__bridge id<MTLBlitCommandEncoder>)blitEncRef; + [enc endEncoding]; + } +} + +static void blitEncCopyFromTexture(CFTypeRef blitEncRef, CFTypeRef srcRef, MTLOrigin srcOrig, MTLSize srcSize, CFTypeRef dstRef, MTLOrigin dstOrig) { + @autoreleasepool { + id<MTLBlitCommandEncoder> enc = (__bridge id<MTLBlitCommandEncoder>)blitEncRef; + id<MTLTexture> src = (__bridge id<MTLTexture>)srcRef; + id<MTLTexture> dst = (__bridge id<MTLTexture>)dstRef; + [enc copyFromTexture:src + sourceSlice:0 + sourceLevel:0 + sourceOrigin:srcOrig + sourceSize:srcSize + toTexture:dst + destinationSlice:0 + destinationLevel:0 + destinationOrigin:dstOrig]; + } +} + +static void blitEncCopyBufferToTexture(CFTypeRef blitEncRef, CFTypeRef bufRef, CFTypeRef texRef, NSUInteger offset, NSUInteger stride, NSUInteger length, MTLSize dims, MTLOrigin orig) { + @autoreleasepool { + id<MTLBlitCommandEncoder> enc = (__bridge id<MTLBlitCommandEncoder>)blitEncRef; + id<MTLBuffer> src = (__bridge id<MTLBuffer>)bufRef; + id<MTLTexture> dst = (__bridge id<MTLTexture>)texRef; + [enc copyFromBuffer:src + sourceOffset:offset + sourceBytesPerRow:stride + sourceBytesPerImage:length + sourceSize:dims + toTexture:dst + destinationSlice:0 + destinationLevel:0 + destinationOrigin:orig]; + } +} + +static void blitEncCopyTextureToBuffer(CFTypeRef blitEncRef, CFTypeRef texRef, CFTypeRef bufRef, NSUInteger offset, NSUInteger stride, NSUInteger length, MTLSize dims, MTLOrigin orig) { + @autoreleasepool { + id<MTLBlitCommandEncoder> enc = (__bridge id<MTLBlitCommandEncoder>)blitEncRef; + id<MTLTexture> src = (__bridge id<MTLTexture>)texRef; + id<MTLBuffer> dst = (__bridge id<MTLBuffer>)bufRef; + [enc copyFromTexture:src + sourceSlice:0 + sourceLevel:0 + sourceOrigin:orig + sourceSize:dims + toBuffer:dst + destinationOffset:offset + destinationBytesPerRow:stride + destinationBytesPerImage:length]; + } +} + +static void blitEncCopyBufferToBuffer(CFTypeRef blitEncRef, CFTypeRef srcRef, CFTypeRef dstRef, NSUInteger srcOff, NSUInteger dstOff, NSUInteger size) { + @autoreleasepool { + id<MTLBlitCommandEncoder> enc = (__bridge id<MTLBlitCommandEncoder>)blitEncRef; + id<MTLBuffer> src = (__bridge id<MTLBuffer>)srcRef; + id<MTLBuffer> dst = (__bridge id<MTLBuffer>)dstRef; + [enc copyFromBuffer:src + sourceOffset:srcOff + toBuffer:dst + destinationOffset:dstOff + size:size]; + } +} + +static CFTypeRef newTexture(CFTypeRef devRef, NSUInteger width, NSUInteger height, MTLPixelFormat format, MTLTextureUsage usage) { + @autoreleasepool { + id<MTLDevice> dev = (__bridge id<MTLDevice>)devRef; + MTLTextureDescriptor *mtlDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: format + width: width + height: height + mipmapped: NO]; + mtlDesc.usage = usage; + mtlDesc.storageMode = MTLStorageModePrivate; + return CFBridgingRetain([dev newTextureWithDescriptor:mtlDesc]); + } +} + +static CFTypeRef newSampler(CFTypeRef devRef, MTLSamplerMinMagFilter minFilter, MTLSamplerMinMagFilter magFilter) { + @autoreleasepool { + id<MTLDevice> dev = (__bridge id<MTLDevice>)devRef; + MTLSamplerDescriptor *desc = [MTLSamplerDescriptor new]; + desc.minFilter = minFilter; + desc.magFilter = magFilter; + return CFBridgingRetain([dev newSamplerStateWithDescriptor:desc]); + } +} + +static CFTypeRef newBuffer(CFTypeRef devRef, NSUInteger size, MTLResourceOptions opts) { + @autoreleasepool { + id<MTLDevice> dev = (__bridge id<MTLDevice>)devRef; + id<MTLBuffer> buf = [dev newBufferWithLength:size + options:opts]; + return CFBridgingRetain(buf); + } +} + +static slice bufferContents(CFTypeRef bufRef) { + @autoreleasepool { + id<MTLBuffer> buf = (__bridge id<MTLBuffer>)bufRef; + slice s = {.addr = [buf contents], .size = [buf length]}; + return s; + } +} + +static CFTypeRef newLibrary(CFTypeRef devRef, char *name, void *mtllib, size_t size) { + @autoreleasepool { + id<MTLDevice> dev = (__bridge id<MTLDevice>)devRef; + dispatch_data_t data = dispatch_data_create(mtllib, size, DISPATCH_TARGET_QUEUE_DEFAULT, DISPATCH_DATA_DESTRUCTOR_DEFAULT); + id<MTLLibrary> lib = [dev newLibraryWithData:data error:nil]; + lib.label = [NSString stringWithUTF8String:name]; + return CFBridgingRetain(lib); + } +} + +static CFTypeRef libraryNewFunction(CFTypeRef libRef, char *funcName) { + @autoreleasepool { + id<MTLLibrary> lib = (__bridge id<MTLLibrary>)libRef; + NSString *name = [NSString stringWithUTF8String:funcName]; + return CFBridgingRetain([lib newFunctionWithName:name]); + } +} + +static CFTypeRef newComputePipeline(CFTypeRef devRef, CFTypeRef funcRef) { + @autoreleasepool { + id<MTLDevice> dev = (__bridge id<MTLDevice>)devRef; + id<MTLFunction> func = (__bridge id<MTLFunction>)funcRef; + return CFBridgingRetain([dev newComputePipelineStateWithFunction:func error:nil]); + } +} + +static CFTypeRef newRenderPipeline(CFTypeRef devRef, CFTypeRef vertFunc, CFTypeRef fragFunc, MTLPixelFormat pixelFormat, NSUInteger bufIdx, NSUInteger nverts, MTLVertexFormat *fmts, NSUInteger *offsets, NSUInteger stride, int blend, MTLBlendFactor srcFactor, MTLBlendFactor dstFactor, NSUInteger nvertBufs, NSUInteger nfragBufs) { + @autoreleasepool { + id<MTLDevice> dev = (__bridge id<MTLDevice>)devRef; + id<MTLFunction> vfunc = (__bridge id<MTLFunction>)vertFunc; + id<MTLFunction> ffunc = (__bridge id<MTLFunction>)fragFunc; + MTLVertexDescriptor *vdesc = [MTLVertexDescriptor vertexDescriptor]; + vdesc.layouts[bufIdx].stride = stride; + for (NSUInteger i = 0; i < nverts; i++) { + vdesc.attributes[i].format = fmts[i]; + vdesc.attributes[i].offset = offsets[i]; + vdesc.attributes[i].bufferIndex = bufIdx; + } + MTLRenderPipelineDescriptor *desc = [MTLRenderPipelineDescriptor new]; + desc.vertexFunction = vfunc; + desc.fragmentFunction = ffunc; + desc.vertexDescriptor = vdesc; + for (NSUInteger i = 0; i < nvertBufs; i++) { + if (@available(iOS 11.0, *)) { + desc.vertexBuffers[i].mutability = MTLMutabilityImmutable; + } + } + for (NSUInteger i = 0; i < nfragBufs; i++) { + if (@available(iOS 11.0, *)) { + desc.fragmentBuffers[i].mutability = MTLMutabilityImmutable; + } + } + desc.colorAttachments[0].pixelFormat = pixelFormat; + desc.colorAttachments[0].blendingEnabled = blend ? YES : NO; + desc.colorAttachments[0].sourceAlphaBlendFactor = srcFactor; + desc.colorAttachments[0].sourceRGBBlendFactor = srcFactor; + desc.colorAttachments[0].destinationAlphaBlendFactor = dstFactor; + desc.colorAttachments[0].destinationRGBBlendFactor = dstFactor; + return CFBridgingRetain([dev newRenderPipelineStateWithDescriptor:desc + error:nil]); + } +} +*/ +import "C" + +type Backend struct { + dev C.CFTypeRef + queue C.CFTypeRef + pixelFmt C.MTLPixelFormat + + cmdBuffer C.CFTypeRef + lastCmdBuffer C.CFTypeRef + renderEnc C.CFTypeRef + computeEnc C.CFTypeRef + blitEnc C.CFTypeRef + + prog *Program + topology C.MTLPrimitiveType + + stagingBuf C.CFTypeRef + stagingOff int + + indexBuf *Buffer + + // bufSizes is scratch space for filling out the spvBufferSizeConstants + // that spirv-cross generates for emulating buffer.length expressions in + // shaders. + bufSizes []uint32 +} + +type Texture struct { + backend *Backend + texture C.CFTypeRef + sampler C.CFTypeRef + width int + height int + foreign bool +} + +type Shader struct { + function C.CFTypeRef + inputs []shader.InputLocation +} + +type Program struct { + pipeline C.CFTypeRef + groupSize [3]int +} + +type Pipeline struct { + pipeline C.CFTypeRef + topology C.MTLPrimitiveType +} + +type Buffer struct { + backend *Backend + size int + buffer C.CFTypeRef + + // store is the buffer contents For buffers not allocated on the GPU. + store []byte +} + +const ( + uniformBufferIndex = 0 + attributeBufferIndex = 1 + + spvBufferSizeConstantsBinding = 25 +) + +const ( + texUnits = 4 + bufferUnits = 4 +) + +func init() { + driver.NewMetalDevice = newMetalDevice +} + +func newMetalDevice(api driver.Metal) (driver.Device, error) { + dev := C.CFTypeRef(api.Device) + C.CFRetain(dev) + queue := C.CFTypeRef(api.Queue) + C.CFRetain(queue) + b := &Backend{ + dev: dev, + queue: queue, + pixelFmt: C.MTLPixelFormat(api.PixelFormat), + bufSizes: make([]uint32, bufferUnits), + } + return b, nil +} + +func (b *Backend) BeginFrame(target driver.RenderTarget, clear bool, viewport image.Point) driver.Texture { + if b.lastCmdBuffer != 0 { + C.cmdBufferWaitUntilCompleted(b.lastCmdBuffer) + b.stagingOff = 0 + } + if target == nil { + return nil + } + switch t := target.(type) { + case driver.MetalRenderTarget: + texture := C.CFTypeRef(t.Texture) + return &Texture{texture: texture, foreign: true} + case *Texture: + return t + default: + panic(fmt.Sprintf("metal: unsupported render target type: %T", t)) + } +} + +func (b *Backend) startBlit() C.CFTypeRef { + if b.blitEnc != 0 { + return b.blitEnc + } + b.endEncoder() + b.ensureCmdBuffer() + b.blitEnc = C.cmdBufferBlitEncoder(b.cmdBuffer) + if b.blitEnc == 0 { + panic("metal: [MTLCommandBuffer blitCommandEncoder:] failed") + } + return b.blitEnc +} + +func (b *Backend) CopyTexture(dst driver.Texture, dorig image.Point, src driver.Texture, srect image.Rectangle) { + enc := b.startBlit() + dstTex := dst.(*Texture).texture + srcTex := src.(*Texture).texture + ssz := srect.Size() + C.blitEncCopyFromTexture( + enc, + srcTex, + C.MTLOrigin{ + x: C.NSUInteger(srect.Min.X), + y: C.NSUInteger(srect.Min.Y), + }, + C.MTLSize{ + width: C.NSUInteger(ssz.X), + height: C.NSUInteger(ssz.Y), + depth: 1, + }, + dstTex, + C.MTLOrigin{ + x: C.NSUInteger(dorig.X), + y: C.NSUInteger(dorig.Y), + }, + ) +} + +func (b *Backend) EndFrame() { + b.endCmdBuffer(false) +} + +func (b *Backend) endCmdBuffer(wait bool) { + b.endEncoder() + if b.cmdBuffer == 0 { + return + } + C.cmdBufferCommit(b.cmdBuffer) + if wait { + C.cmdBufferWaitUntilCompleted(b.cmdBuffer) + } + if b.lastCmdBuffer != 0 { + C.CFRelease(b.lastCmdBuffer) + } + b.lastCmdBuffer = b.cmdBuffer + b.cmdBuffer = 0 +} + +func (b *Backend) Caps() driver.Caps { + return driver.Caps{ + MaxTextureSize: 8192, + Features: driver.FeatureSRGB | driver.FeatureCompute | driver.FeatureFloatRenderTargets, + } +} + +func (b *Backend) NewTimer() driver.Timer { + panic("timers not supported") +} + +func (b *Backend) IsTimeContinuous() bool { + panic("timers not supported") +} + +func (b *Backend) Release() { + if b.cmdBuffer != 0 { + C.CFRelease(b.cmdBuffer) + } + if b.lastCmdBuffer != 0 { + C.CFRelease(b.lastCmdBuffer) + } + if b.stagingBuf != 0 { + C.CFRelease(b.stagingBuf) + } + C.CFRelease(b.queue) + C.CFRelease(b.dev) + *b = Backend{} +} + +func (b *Backend) NewTexture(format driver.TextureFormat, width, height int, minFilter, magFilter driver.TextureFilter, bindings driver.BufferBinding) (driver.Texture, error) { + mformat := pixelFormatFor(format) + var usage C.MTLTextureUsage + if bindings&(driver.BufferBindingTexture|driver.BufferBindingShaderStorageRead) != 0 { + usage |= C.MTLTextureUsageShaderRead + } + if bindings&driver.BufferBindingFramebuffer != 0 { + usage |= C.MTLTextureUsageRenderTarget + } + if bindings&driver.BufferBindingShaderStorageWrite != 0 { + usage |= C.MTLTextureUsageShaderWrite + } + tex := C.newTexture(b.dev, C.NSUInteger(width), C.NSUInteger(height), mformat, usage) + if tex == 0 { + return nil, errors.New("metal: [MTLDevice newTextureWithDescriptor:] failed") + } + min := samplerFilterFor(minFilter) + max := samplerFilterFor(magFilter) + s := C.newSampler(b.dev, min, max) + if s == 0 { + C.CFRelease(tex) + return nil, errors.New("metal: [MTLDevice newSamplerStateWithDescriptor:] failed") + } + return &Texture{backend: b, texture: tex, sampler: s, width: width, height: height}, nil +} + +func samplerFilterFor(f driver.TextureFilter) C.MTLSamplerMinMagFilter { + switch f { + case driver.FilterNearest: + return C.MTLSamplerMinMagFilterNearest + case driver.FilterLinear: + return C.MTLSamplerMinMagFilterLinear + default: + panic("invalid texture filter") + } +} + +func (b *Backend) NewPipeline(desc driver.PipelineDesc) (driver.Pipeline, error) { + vsh, fsh := desc.VertexShader.(*Shader), desc.FragmentShader.(*Shader) + layout := desc.VertexLayout.Inputs + if got, exp := len(layout), len(vsh.inputs); got != exp { + return nil, fmt.Errorf("metal: number of input descriptors (%d) doesn't match number of inputs (%d)", got, exp) + } + formats := make([]C.MTLVertexFormat, len(layout)) + offsets := make([]C.NSUInteger, len(layout)) + for i, inp := range layout { + index := vsh.inputs[i].Location + formats[index] = vertFormatFor(vsh.inputs[i]) + offsets[index] = C.NSUInteger(inp.Offset) + } + var ( + fmtPtr *C.MTLVertexFormat + offPtr *C.NSUInteger + ) + if len(layout) > 0 { + fmtPtr = &formats[0] + offPtr = &offsets[0] + } + srcFactor := blendFactorFor(desc.BlendDesc.SrcFactor) + dstFactor := blendFactorFor(desc.BlendDesc.DstFactor) + blend := C.int(0) + if desc.BlendDesc.Enable { + blend = 1 + } + pf := b.pixelFmt + if f := desc.PixelFormat; f != driver.TextureFormatOutput { + pf = pixelFormatFor(f) + } + pipe := C.newRenderPipeline( + b.dev, + vsh.function, + fsh.function, + pf, + attributeBufferIndex, + C.NSUInteger(len(layout)), fmtPtr, offPtr, + C.NSUInteger(desc.VertexLayout.Stride), + blend, srcFactor, dstFactor, + 2, // Number of vertex buffers. + 1, // Number of fragment buffers. + ) + if pipe == 0 { + return nil, errors.New("metal: pipeline construction failed") + } + return &Pipeline{pipeline: pipe, topology: primitiveFor(desc.Topology)}, nil +} + +func dataTypeSize(d shader.DataType) int { + switch d { + case shader.DataTypeFloat: + return 4 + default: + panic("unsupported data type") + } +} + +func blendFactorFor(f driver.BlendFactor) C.MTLBlendFactor { + switch f { + case driver.BlendFactorZero: + return C.MTLBlendFactorZero + case driver.BlendFactorOne: + return C.MTLBlendFactorOne + case driver.BlendFactorOneMinusSrcAlpha: + return C.MTLBlendFactorOneMinusSourceAlpha + case driver.BlendFactorDstColor: + return C.MTLBlendFactorDestinationColor + default: + panic("unsupported blend factor") + } +} + +func vertFormatFor(f shader.InputLocation) C.MTLVertexFormat { + t := f.Type + s := f.Size + switch { + case t == shader.DataTypeFloat && s == 1: + return C.MTLVertexFormatFloat + case t == shader.DataTypeFloat && s == 2: + return C.MTLVertexFormatFloat2 + case t == shader.DataTypeFloat && s == 3: + return C.MTLVertexFormatFloat3 + case t == shader.DataTypeFloat && s == 4: + return C.MTLVertexFormatFloat4 + default: + panic("unsupported data type") + } +} + +func pixelFormatFor(f driver.TextureFormat) C.MTLPixelFormat { + switch f { + case driver.TextureFormatFloat: + return C.MTLPixelFormatR16Float + case driver.TextureFormatRGBA8: + return C.MTLPixelFormatRGBA8Unorm + case driver.TextureFormatSRGBA: + return C.MTLPixelFormatRGBA8Unorm_sRGB + default: + panic("unsupported pixel format") + } +} + +func (b *Backend) NewBuffer(typ driver.BufferBinding, size int) (driver.Buffer, error) { + // Transfer buffer contents in command encoders on every use for + // smaller buffers. The advantage is that buffer re-use during a frame + // won't occur a GPU wait. + // We can't do this for buffers written to by the GPU and read by the client, + // and Metal doesn't require a buffer for indexed draws. + if size <= 4096 && typ&(driver.BufferBindingShaderStorageWrite|driver.BufferBindingIndices) == 0 { + return &Buffer{size: size, store: make([]byte, size)}, nil + } + buf := C.newBuffer(b.dev, C.NSUInteger(size), C.MTLResourceStorageModePrivate) + return &Buffer{backend: b, size: size, buffer: buf}, nil +} + +func (b *Backend) NewImmutableBuffer(typ driver.BufferBinding, data []byte) (driver.Buffer, error) { + buf, err := b.NewBuffer(typ, len(data)) + if err != nil { + return nil, err + } + buf.Upload(data) + return buf, nil +} + +func (b *Backend) NewComputeProgram(src shader.Sources) (driver.Program, error) { + sh, err := b.newShader(src) + if err != nil { + return nil, err + } + defer sh.Release() + pipe := C.newComputePipeline(b.dev, sh.function) + if pipe == 0 { + return nil, fmt.Errorf("metal: compute program %q load failed", src.Name) + } + return &Program{pipeline: pipe, groupSize: src.WorkgroupSize}, nil +} + +func (b *Backend) NewVertexShader(src shader.Sources) (driver.VertexShader, error) { + return b.newShader(src) +} + +func (b *Backend) NewFragmentShader(src shader.Sources) (driver.FragmentShader, error) { + return b.newShader(src) +} + +func (b *Backend) newShader(src shader.Sources) (*Shader, error) { + vsrc := []byte(src.MetalLib) + cname := C.CString(src.Name) + defer C.free(unsafe.Pointer(cname)) + vlib := C.newLibrary(b.dev, cname, unsafe.Pointer(&vsrc[0]), C.size_t(len(vsrc))) + if vlib == 0 { + return nil, fmt.Errorf("metal: vertex shader %q load failed", src.Name) + } + defer C.CFRelease(vlib) + funcName := C.CString("main0") + defer C.free(unsafe.Pointer(funcName)) + f := C.libraryNewFunction(vlib, funcName) + if f == 0 { + return nil, fmt.Errorf("metal: main function not found in %q", src.Name) + } + return &Shader{function: f, inputs: src.Inputs}, nil +} + +func (b *Backend) Viewport(x, y, width, height int) { + enc := b.renderEnc + if enc == 0 { + panic("no active render pass") + } + C.renderEncViewport(enc, C.MTLViewport{ + originX: C.double(x), + originY: C.double(y), + width: C.double(width), + height: C.double(height), + znear: 0.0, + zfar: 1.0, + }) +} + +func (b *Backend) DrawArrays(off, count int) { + enc := b.renderEnc + if enc == 0 { + panic("no active render pass") + } + C.renderEncDrawPrimitives(enc, b.topology, C.NSUInteger(off), C.NSUInteger(count)) +} + +func (b *Backend) DrawElements(off, count int) { + enc := b.renderEnc + if enc == 0 { + panic("no active render pass") + } + C.renderEncDrawIndexedPrimitives(enc, b.topology, b.indexBuf.buffer, C.NSUInteger(off), C.NSUInteger(count)) +} + +func primitiveFor(mode driver.Topology) C.MTLPrimitiveType { + switch mode { + case driver.TopologyTriangles: + return C.MTLPrimitiveTypeTriangle + case driver.TopologyTriangleStrip: + return C.MTLPrimitiveTypeTriangleStrip + default: + panic("metal: unknown draw mode") + } +} + +func (b *Backend) BindImageTexture(unit int, tex driver.Texture) { + b.BindTexture(unit, tex) +} + +func (b *Backend) BeginCompute() { + b.endEncoder() + b.ensureCmdBuffer() + for i := range b.bufSizes { + b.bufSizes[i] = 0 + } + b.computeEnc = C.cmdBufferComputeEncoder(b.cmdBuffer) + if b.computeEnc == 0 { + panic("metal: [MTLCommandBuffer computeCommandEncoder:] failed") + } +} + +func (b *Backend) EndCompute() { + if b.computeEnc == 0 { + panic("no active compute pass") + } + C.computeEncEnd(b.computeEnc) + C.CFRelease(b.computeEnc) + b.computeEnc = 0 +} + +func (b *Backend) DispatchCompute(x, y, z int) { + enc := b.computeEnc + if enc == 0 { + panic("no active compute pass") + } + C.computeEncSetBytes(enc, unsafe.Pointer(&b.bufSizes[0]), C.NSUInteger(len(b.bufSizes)*4), spvBufferSizeConstantsBinding) + threadgroupsPerGrid := C.MTLSize{ + width: C.NSUInteger(x), height: C.NSUInteger(y), depth: C.NSUInteger(z), + } + sz := b.prog.groupSize + threadsPerThreadgroup := C.MTLSize{ + width: C.NSUInteger(sz[0]), height: C.NSUInteger(sz[1]), depth: C.NSUInteger(sz[2]), + } + C.computeEncDispatch(enc, threadgroupsPerGrid, threadsPerThreadgroup) +} + +func (b *Backend) stagingBuffer(size int) (C.CFTypeRef, int) { + if b.stagingBuf == 0 || b.stagingOff+size > len(bufferStore(b.stagingBuf)) { + if b.stagingBuf != 0 { + C.CFRelease(b.stagingBuf) + } + cap := 2 * (b.stagingOff + size) + b.stagingBuf = C.newBuffer(b.dev, C.NSUInteger(cap), C.MTLResourceStorageModeShared|C.MTLResourceCPUCacheModeWriteCombined) + if b.stagingBuf == 0 { + panic(fmt.Errorf("metal: failed to allocate %d bytes of staging buffer", cap)) + } + b.stagingOff = 0 + } + off := b.stagingOff + b.stagingOff += size + return b.stagingBuf, off +} + +func (t *Texture) Upload(offset, size image.Point, pixels []byte, stride int) { + if len(pixels) == 0 { + return + } + if stride == 0 { + stride = size.X * 4 + } + dstStride := size.X * 4 + n := size.Y * dstStride + buf, off := t.backend.stagingBuffer(n) + store := bufferSlice(buf, off, n) + var srcOff, dstOff int + for y := 0; y < size.Y; y++ { + srcRow := pixels[srcOff : srcOff+dstStride] + dstRow := store[dstOff : dstOff+dstStride] + copy(dstRow, srcRow) + dstOff += dstStride + srcOff += stride + } + enc := t.backend.startBlit() + orig := C.MTLOrigin{ + x: C.NSUInteger(offset.X), + y: C.NSUInteger(offset.Y), + } + msize := C.MTLSize{ + width: C.NSUInteger(size.X), + height: C.NSUInteger(size.Y), + depth: 1, + } + C.blitEncCopyBufferToTexture(enc, buf, t.texture, C.NSUInteger(off), C.NSUInteger(dstStride), C.NSUInteger(len(store)), msize, orig) +} + +func (t *Texture) Release() { + if t.foreign { + panic("metal: release of external texture") + } + C.CFRelease(t.texture) + C.CFRelease(t.sampler) + *t = Texture{} +} + +func (p *Pipeline) Release() { + C.CFRelease(p.pipeline) + *p = Pipeline{} +} + +func (b *Backend) PrepareTexture(tex driver.Texture) {} + +func (b *Backend) BindTexture(unit int, tex driver.Texture) { + t := tex.(*Texture) + if enc := b.renderEnc; enc != 0 { + C.renderEncSetFragmentTexture(enc, C.NSUInteger(unit), t.texture) + C.renderEncSetFragmentSamplerState(enc, C.NSUInteger(unit), t.sampler) + } else if enc := b.computeEnc; enc != 0 { + C.computeEncSetTexture(enc, C.NSUInteger(unit), t.texture) + } else { + panic("no active render nor compute pass") + } +} + +func (b *Backend) ensureCmdBuffer() { + if b.cmdBuffer != 0 { + return + } + b.cmdBuffer = C.queueNewBuffer(b.queue) + if b.cmdBuffer == 0 { + panic("metal: [MTLCommandQueue cmdBuffer] failed") + } +} + +func (b *Backend) BindPipeline(pipe driver.Pipeline) { + p := pipe.(*Pipeline) + enc := b.renderEnc + if enc == 0 { + panic("no active render pass") + } + C.renderEncSetRenderPipelineState(enc, p.pipeline) + b.topology = p.topology +} + +func (b *Backend) BindProgram(prog driver.Program) { + enc := b.computeEnc + if enc == 0 { + panic("no active compute pass") + } + p := prog.(*Program) + C.computeEncSetPipeline(enc, p.pipeline) + b.prog = p +} + +func (s *Shader) Release() { + C.CFRelease(s.function) + *s = Shader{} +} + +func (p *Program) Release() { + C.CFRelease(p.pipeline) + *p = Program{} +} + +func (b *Backend) BindStorageBuffer(binding int, buffer driver.Buffer) { + buf := buffer.(*Buffer) + b.bufSizes[binding] = uint32(buf.size) + enc := b.computeEnc + if enc == 0 { + panic("no active compute pass") + } + if buf.buffer != 0 { + C.computeEncSetBuffer(enc, C.NSUInteger(binding), buf.buffer) + } else if buf.size > 0 { + C.computeEncSetBytes(enc, unsafe.Pointer(&buf.store[0]), C.NSUInteger(buf.size), C.NSUInteger(binding)) + } +} + +func (b *Backend) BindUniforms(buf driver.Buffer) { + bf := buf.(*Buffer) + enc := b.renderEnc + if enc == 0 { + panic("no active render pass") + } + if bf.buffer != 0 { + C.renderEncSetVertexBuffer(enc, bf.buffer, uniformBufferIndex, 0) + C.renderEncSetFragmentBuffer(enc, bf.buffer, uniformBufferIndex, 0) + } else if bf.size > 0 { + C.renderEncSetVertexBytes(enc, unsafe.Pointer(&bf.store[0]), C.NSUInteger(bf.size), uniformBufferIndex) + C.renderEncSetFragmentBytes(enc, unsafe.Pointer(&bf.store[0]), C.NSUInteger(bf.size), uniformBufferIndex) + } +} + +func (b *Backend) BindVertexBuffer(buf driver.Buffer, offset int) { + bf := buf.(*Buffer) + enc := b.renderEnc + if enc == 0 { + panic("no active render pass") + } + if bf.buffer != 0 { + C.renderEncSetVertexBuffer(enc, bf.buffer, attributeBufferIndex, C.NSUInteger(offset)) + } else if n := bf.size - offset; n > 0 { + C.renderEncSetVertexBytes(enc, unsafe.Pointer(&bf.store[offset]), C.NSUInteger(n), attributeBufferIndex) + } +} + +func (b *Backend) BindIndexBuffer(buf driver.Buffer) { + b.indexBuf = buf.(*Buffer) +} + +func (b *Buffer) Download(data []byte) error { + if len(data) > b.size { + panic(fmt.Errorf("len(data) (%d) larger than len(content) (%d)", len(data), b.size)) + } + buf, off := b.backend.stagingBuffer(len(data)) + enc := b.backend.startBlit() + C.blitEncCopyBufferToBuffer(enc, b.buffer, buf, 0, C.NSUInteger(off), C.NSUInteger(len(data))) + b.backend.endCmdBuffer(true) + store := bufferSlice(buf, off, len(data)) + copy(data, store) + return nil +} + +func (b *Buffer) Upload(data []byte) { + if len(data) > b.size { + panic(fmt.Errorf("len(data) (%d) larger than len(content) (%d)", len(data), b.size)) + } + if b.buffer == 0 { + copy(b.store, data) + return + } + buf, off := b.backend.stagingBuffer(len(data)) + store := bufferSlice(buf, off, len(data)) + copy(store, data) + enc := b.backend.startBlit() + C.blitEncCopyBufferToBuffer(enc, buf, b.buffer, C.NSUInteger(off), 0, C.NSUInteger(len(store))) +} + +func bufferStore(buf C.CFTypeRef) []byte { + contents := C.bufferContents(buf) + return (*(*[1 << 30]byte)(contents.addr))[:contents.size:contents.size] +} + +func bufferSlice(buf C.CFTypeRef, off, len int) []byte { + store := bufferStore(buf) + return store[off : off+len] +} + +func (b *Buffer) Release() { + if b.buffer != 0 { + C.CFRelease(b.buffer) + } + *b = Buffer{} +} + +func (t *Texture) ReadPixels(src image.Rectangle, pixels []byte, stride int) error { + if len(pixels) == 0 { + return nil + } + sz := src.Size() + orig := C.MTLOrigin{ + x: C.NSUInteger(src.Min.X), + y: C.NSUInteger(src.Min.Y), + } + msize := C.MTLSize{ + width: C.NSUInteger(sz.X), + height: C.NSUInteger(sz.Y), + depth: 1, + } + stageStride := sz.X * 4 + n := sz.Y * stageStride + buf, off := t.backend.stagingBuffer(n) + enc := t.backend.startBlit() + C.blitEncCopyTextureToBuffer(enc, t.texture, buf, C.NSUInteger(off), C.NSUInteger(stageStride), C.NSUInteger(n), msize, orig) + t.backend.endCmdBuffer(true) + store := bufferSlice(buf, off, n) + var srcOff, dstOff int + for y := 0; y < sz.Y; y++ { + dstRow := pixels[srcOff : srcOff+stageStride] + srcRow := store[dstOff : dstOff+stageStride] + copy(dstRow, srcRow) + dstOff += stageStride + srcOff += stride + } + return nil +} + +func (b *Backend) BeginRenderPass(tex driver.Texture, d driver.LoadDesc) { + b.endEncoder() + b.ensureCmdBuffer() + f := tex.(*Texture) + col := d.ClearColor + var act C.MTLLoadAction + switch d.Action { + case driver.LoadActionKeep: + act = C.MTLLoadActionLoad + case driver.LoadActionClear: + act = C.MTLLoadActionClear + case driver.LoadActionInvalidate: + act = C.MTLLoadActionDontCare + } + b.renderEnc = C.cmdBufferRenderEncoder(b.cmdBuffer, f.texture, act, C.float(col.R), C.float(col.G), C.float(col.B), C.float(col.A)) + if b.renderEnc == 0 { + panic("metal: [MTLCommandBuffer renderCommandEncoderWithDescriptor:] failed") + } +} + +func (b *Backend) EndRenderPass() { + if b.renderEnc == 0 { + panic("no active render pass") + } + C.renderEncEnd(b.renderEnc) + C.CFRelease(b.renderEnc) + b.renderEnc = 0 +} + +func (b *Backend) endEncoder() { + if b.renderEnc != 0 { + panic("active render pass") + } + if b.computeEnc != 0 { + panic("active compute pass") + } + if b.blitEnc != 0 { + C.blitEncEnd(b.blitEnc) + C.CFRelease(b.blitEnc) + b.blitEnc = 0 + } +} + +func (f *Texture) ImplementsRenderTarget() {} diff --git a/vendor/gioui.org/gpu/internal/opengl/opengl.go b/vendor/gioui.org/gpu/internal/opengl/opengl.go new file mode 100644 index 0000000..ef89197 --- /dev/null +++ b/vendor/gioui.org/gpu/internal/opengl/opengl.go @@ -0,0 +1,1357 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package opengl + +import ( + "errors" + "fmt" + "image" + "strings" + "time" + "unsafe" + + "gioui.org/gpu/internal/driver" + "gioui.org/internal/gl" + "gioui.org/shader" +) + +// Backend implements driver.Device. +type Backend struct { + funcs *gl.Functions + + clear bool + glstate glState + state state + savedState glState + sharedCtx bool + + glver [2]int + gles bool + feats driver.Caps + // floatTriple holds the settings for floating point + // textures. + floatTriple textureTriple + // Single channel alpha textures. + alphaTriple textureTriple + srgbaTriple textureTriple + storage [storageBindings]*buffer + + outputFBO gl.Framebuffer + sRGBFBO *SRGBFBO + + // vertArray is bound during a frame. We don't need it, but + // core desktop OpenGL profile 3.3 requires some array bound. + vertArray gl.VertexArray +} + +// State tracking. +type glState struct { + drawFBO gl.Framebuffer + readFBO gl.Framebuffer + renderBuf gl.Renderbuffer + vertAttribs [5]struct { + obj gl.Buffer + enabled bool + size int + typ gl.Enum + normalized bool + stride int + offset uintptr + } + prog gl.Program + texUnits struct { + active gl.Enum + binds [2]gl.Texture + } + arrayBuf gl.Buffer + elemBuf gl.Buffer + uniBuf gl.Buffer + uniBufs [2]gl.Buffer + storeBuf gl.Buffer + storeBufs [4]gl.Buffer + vertArray gl.VertexArray + srgb bool + blend struct { + enable bool + srcRGB, dstRGB gl.Enum + srcA, dstA gl.Enum + } + clearColor [4]float32 + viewport [4]int + unpack_row_length int + pack_row_length int +} + +type state struct { + pipeline *pipeline + buffer bufferBinding +} + +type bufferBinding struct { + obj gl.Buffer + offset int +} + +type timer struct { + funcs *gl.Functions + obj gl.Query +} + +type texture struct { + backend *Backend + obj gl.Texture + fbo gl.Framebuffer + hasFBO bool + triple textureTriple + width int + height int + bindings driver.BufferBinding + foreign bool +} + +type pipeline struct { + prog *program + inputs []shader.InputLocation + layout driver.VertexLayout + blend driver.BlendDesc + topology driver.Topology +} + +type buffer struct { + backend *Backend + hasBuffer bool + obj gl.Buffer + typ driver.BufferBinding + size int + immutable bool + // For emulation of uniform buffers. + data []byte +} + +type glshader struct { + backend *Backend + obj gl.Shader + src shader.Sources +} + +type program struct { + backend *Backend + obj gl.Program + vertUniforms uniforms + fragUniforms uniforms +} + +type uniforms struct { + locs []uniformLocation + size int +} + +type uniformLocation struct { + uniform gl.Uniform + offset int + typ shader.DataType + size int +} + +type inputLayout struct { + inputs []shader.InputLocation + layout []driver.InputDesc +} + +// textureTriple holds the type settings for +// a TexImage2D call. +type textureTriple struct { + internalFormat gl.Enum + format gl.Enum + typ gl.Enum +} + +const ( + storageBindings = 32 +) + +func init() { + driver.NewOpenGLDevice = newOpenGLDevice +} + +// Supporting compute programs is theoretically possible with OpenGL ES 3.1. In +// practice, there are too many driver issues, especially on Android (e.g. +// Google Pixel, Samsung J2 are both broken i different ways). Disable support +// and rely on Vulkan for devices that support it, and the CPU fallback for +// devices that don't. +const brokenGLES31 = true + +func newOpenGLDevice(api driver.OpenGL) (driver.Device, error) { + f, err := gl.NewFunctions(api.Context, api.ES) + if err != nil { + return nil, err + } + exts := strings.Split(f.GetString(gl.EXTENSIONS), " ") + glVer := f.GetString(gl.VERSION) + ver, gles, err := gl.ParseGLVersion(glVer) + if err != nil { + return nil, err + } + floatTriple, ffboErr := floatTripleFor(f, ver, exts) + srgbaTriple, srgbErr := srgbaTripleFor(ver, exts) + gles31 := gles && (ver[0] > 3 || (ver[0] == 3 && ver[1] >= 1)) + b := &Backend{ + glver: ver, + gles: gles, + funcs: f, + floatTriple: floatTriple, + alphaTriple: alphaTripleFor(ver), + srgbaTriple: srgbaTriple, + sharedCtx: api.Shared, + } + b.feats.BottomLeftOrigin = true + if srgbErr == nil { + b.feats.Features |= driver.FeatureSRGB + } + if ffboErr == nil { + b.feats.Features |= driver.FeatureFloatRenderTargets + } + if gles31 && !brokenGLES31 { + b.feats.Features |= driver.FeatureCompute + } + if hasExtension(exts, "GL_EXT_disjoint_timer_query_webgl2") || hasExtension(exts, "GL_EXT_disjoint_timer_query") { + b.feats.Features |= driver.FeatureTimers + } + b.feats.MaxTextureSize = f.GetInteger(gl.MAX_TEXTURE_SIZE) + if !b.sharedCtx { + // We have exclusive access to the context, so query the GL state once + // instead of at each frame. + b.glstate = b.queryState() + } + return b, nil +} + +func (b *Backend) BeginFrame(target driver.RenderTarget, clear bool, viewport image.Point) driver.Texture { + b.clear = clear + if b.sharedCtx { + b.glstate = b.queryState() + b.savedState = b.glstate + } + b.state = state{} + var renderFBO gl.Framebuffer + if target != nil { + switch t := target.(type) { + case driver.OpenGLRenderTarget: + renderFBO = gl.Framebuffer(t) + case *texture: + renderFBO = t.ensureFBO() + default: + panic(fmt.Errorf("opengl: invalid render target type: %T", target)) + } + } + b.outputFBO = renderFBO + b.glstate.bindFramebuffer(b.funcs, gl.FRAMEBUFFER, renderFBO) + if b.gles { + // If the output framebuffer is not in the sRGB colorspace already, emulate it. + var fbEncoding int + if !renderFBO.Valid() { + fbEncoding = b.funcs.GetFramebufferAttachmentParameteri(gl.FRAMEBUFFER, gl.BACK, gl.FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING) + } else { + fbEncoding = b.funcs.GetFramebufferAttachmentParameteri(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING) + } + if fbEncoding == gl.LINEAR && viewport != (image.Point{}) { + if b.sRGBFBO == nil { + sfbo, err := NewSRGBFBO(b.funcs, &b.glstate) + if err != nil { + panic(err) + } + b.sRGBFBO = sfbo + } + if err := b.sRGBFBO.Refresh(viewport); err != nil { + panic(err) + } + renderFBO = b.sRGBFBO.Framebuffer() + } else if b.sRGBFBO != nil { + b.sRGBFBO.Release() + b.sRGBFBO = nil + } + } else { + b.glstate.set(b.funcs, gl.FRAMEBUFFER_SRGB, true) + if !b.vertArray.Valid() { + b.vertArray = b.funcs.CreateVertexArray() + } + b.glstate.bindVertexArray(b.funcs, b.vertArray) + } + b.glstate.bindFramebuffer(b.funcs, gl.FRAMEBUFFER, renderFBO) + if b.sRGBFBO != nil && !clear { + b.clearOutput(0, 0, 0, 0) + } + return &texture{backend: b, fbo: renderFBO, hasFBO: true, foreign: true} +} + +func (b *Backend) EndFrame() { + if b.sRGBFBO != nil { + b.glstate.bindFramebuffer(b.funcs, gl.FRAMEBUFFER, b.outputFBO) + if b.clear { + b.SetBlend(false) + } else { + b.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha) + b.SetBlend(true) + } + b.sRGBFBO.Blit() + } + if b.sharedCtx { + b.restoreState(b.savedState) + } +} + +func (b *Backend) queryState() glState { + s := glState{ + prog: gl.Program(b.funcs.GetBinding(gl.CURRENT_PROGRAM)), + arrayBuf: gl.Buffer(b.funcs.GetBinding(gl.ARRAY_BUFFER_BINDING)), + elemBuf: gl.Buffer(b.funcs.GetBinding(gl.ELEMENT_ARRAY_BUFFER_BINDING)), + drawFBO: gl.Framebuffer(b.funcs.GetBinding(gl.FRAMEBUFFER_BINDING)), + clearColor: b.funcs.GetFloat4(gl.COLOR_CLEAR_VALUE), + viewport: b.funcs.GetInteger4(gl.VIEWPORT), + unpack_row_length: b.funcs.GetInteger(gl.UNPACK_ROW_LENGTH), + pack_row_length: b.funcs.GetInteger(gl.PACK_ROW_LENGTH), + } + s.blend.enable = b.funcs.IsEnabled(gl.BLEND) + s.blend.srcRGB = gl.Enum(b.funcs.GetInteger(gl.BLEND_SRC_RGB)) + s.blend.dstRGB = gl.Enum(b.funcs.GetInteger(gl.BLEND_DST_RGB)) + s.blend.srcA = gl.Enum(b.funcs.GetInteger(gl.BLEND_SRC_ALPHA)) + s.blend.dstA = gl.Enum(b.funcs.GetInteger(gl.BLEND_DST_ALPHA)) + s.texUnits.active = gl.Enum(b.funcs.GetInteger(gl.ACTIVE_TEXTURE)) + if !b.gles { + s.srgb = b.funcs.IsEnabled(gl.FRAMEBUFFER_SRGB) + } + if !b.gles || b.glver[0] >= 3 { + s.vertArray = gl.VertexArray(b.funcs.GetBinding(gl.VERTEX_ARRAY_BINDING)) + s.readFBO = gl.Framebuffer(b.funcs.GetBinding(gl.READ_FRAMEBUFFER_BINDING)) + s.uniBuf = gl.Buffer(b.funcs.GetBinding(gl.UNIFORM_BUFFER_BINDING)) + for i := range s.uniBufs { + s.uniBufs[i] = gl.Buffer(b.funcs.GetBindingi(gl.UNIFORM_BUFFER_BINDING, i)) + } + } + if b.gles && (b.glver[0] > 3 || (b.glver[0] == 3 && b.glver[1] >= 1)) { + s.storeBuf = gl.Buffer(b.funcs.GetBinding(gl.SHADER_STORAGE_BUFFER_BINDING)) + for i := range s.storeBufs { + s.storeBufs[i] = gl.Buffer(b.funcs.GetBindingi(gl.SHADER_STORAGE_BUFFER_BINDING, i)) + } + } + for i := range s.texUnits.binds { + s.activeTexture(b.funcs, gl.TEXTURE0+gl.Enum(i)) + s.texUnits.binds[i] = gl.Texture(b.funcs.GetBinding(gl.TEXTURE_BINDING_2D)) + } + for i := range s.vertAttribs { + a := &s.vertAttribs[i] + a.enabled = b.funcs.GetVertexAttrib(i, gl.VERTEX_ATTRIB_ARRAY_ENABLED) != gl.FALSE + a.obj = gl.Buffer(b.funcs.GetVertexAttribBinding(i, gl.VERTEX_ATTRIB_ARRAY_ENABLED)) + a.size = b.funcs.GetVertexAttrib(i, gl.VERTEX_ATTRIB_ARRAY_SIZE) + a.typ = gl.Enum(b.funcs.GetVertexAttrib(i, gl.VERTEX_ATTRIB_ARRAY_TYPE)) + a.normalized = b.funcs.GetVertexAttrib(i, gl.VERTEX_ATTRIB_ARRAY_NORMALIZED) != gl.FALSE + a.stride = b.funcs.GetVertexAttrib(i, gl.VERTEX_ATTRIB_ARRAY_STRIDE) + a.offset = b.funcs.GetVertexAttribPointer(i, gl.VERTEX_ATTRIB_ARRAY_POINTER) + } + return s +} + +func (b *Backend) restoreState(dst glState) { + src := b.glstate + f := b.funcs + for i, unit := range dst.texUnits.binds { + src.bindTexture(f, i, unit) + } + src.activeTexture(f, dst.texUnits.active) + src.bindFramebuffer(f, gl.FRAMEBUFFER, dst.drawFBO) + src.bindFramebuffer(f, gl.READ_FRAMEBUFFER, dst.readFBO) + src.set(f, gl.BLEND, dst.blend.enable) + bf := dst.blend + src.setBlendFuncSeparate(f, bf.srcRGB, bf.dstRGB, bf.srcA, bf.dstA) + src.set(f, gl.FRAMEBUFFER_SRGB, dst.srgb) + src.bindVertexArray(f, dst.vertArray) + src.useProgram(f, dst.prog) + src.bindBuffer(f, gl.ELEMENT_ARRAY_BUFFER, dst.elemBuf) + for i, b := range dst.uniBufs { + src.bindBufferBase(f, gl.UNIFORM_BUFFER, i, b) + } + src.bindBuffer(f, gl.UNIFORM_BUFFER, dst.uniBuf) + for i, b := range dst.storeBufs { + src.bindBufferBase(f, gl.SHADER_STORAGE_BUFFER, i, b) + } + src.bindBuffer(f, gl.SHADER_STORAGE_BUFFER, dst.storeBuf) + col := dst.clearColor + src.setClearColor(f, col[0], col[1], col[2], col[3]) + for i, attr := range dst.vertAttribs { + src.setVertexAttribArray(f, i, attr.enabled) + src.vertexAttribPointer(f, attr.obj, i, attr.size, attr.typ, attr.normalized, attr.stride, int(attr.offset)) + } + src.bindBuffer(f, gl.ARRAY_BUFFER, dst.arrayBuf) + v := dst.viewport + src.setViewport(f, v[0], v[1], v[2], v[3]) + src.pixelStorei(f, gl.UNPACK_ROW_LENGTH, dst.unpack_row_length) + src.pixelStorei(f, gl.PACK_ROW_LENGTH, dst.pack_row_length) +} + +func (s *glState) setVertexAttribArray(f *gl.Functions, idx int, enabled bool) { + a := &s.vertAttribs[idx] + if enabled != a.enabled { + if enabled { + f.EnableVertexAttribArray(gl.Attrib(idx)) + } else { + f.DisableVertexAttribArray(gl.Attrib(idx)) + } + a.enabled = enabled + } +} + +func (s *glState) vertexAttribPointer(f *gl.Functions, buf gl.Buffer, idx, size int, typ gl.Enum, normalized bool, stride, offset int) { + s.bindBuffer(f, gl.ARRAY_BUFFER, buf) + a := &s.vertAttribs[idx] + a.obj = buf + a.size = size + a.typ = typ + a.normalized = normalized + a.stride = stride + a.offset = uintptr(offset) + f.VertexAttribPointer(gl.Attrib(idx), a.size, a.typ, a.normalized, a.stride, int(a.offset)) +} + +func (s *glState) activeTexture(f *gl.Functions, unit gl.Enum) { + if unit != s.texUnits.active { + f.ActiveTexture(unit) + s.texUnits.active = unit + } +} + +func (s *glState) bindRenderbuffer(f *gl.Functions, target gl.Enum, r gl.Renderbuffer) { + if !r.Equal(s.renderBuf) { + f.BindRenderbuffer(gl.RENDERBUFFER, r) + s.renderBuf = r + } +} + +func (s *glState) bindTexture(f *gl.Functions, unit int, t gl.Texture) { + s.activeTexture(f, gl.TEXTURE0+gl.Enum(unit)) + if !t.Equal(s.texUnits.binds[unit]) { + f.BindTexture(gl.TEXTURE_2D, t) + s.texUnits.binds[unit] = t + } +} + +func (s *glState) bindVertexArray(f *gl.Functions, a gl.VertexArray) { + if !a.Equal(s.vertArray) { + f.BindVertexArray(a) + s.vertArray = a + } +} + +func (s *glState) deleteRenderbuffer(f *gl.Functions, r gl.Renderbuffer) { + f.DeleteRenderbuffer(r) + if r.Equal(s.renderBuf) { + s.renderBuf = gl.Renderbuffer{} + } +} + +func (s *glState) deleteFramebuffer(f *gl.Functions, fbo gl.Framebuffer) { + f.DeleteFramebuffer(fbo) + if fbo.Equal(s.drawFBO) { + s.drawFBO = gl.Framebuffer{} + } + if fbo.Equal(s.readFBO) { + s.readFBO = gl.Framebuffer{} + } +} + +func (s *glState) deleteBuffer(f *gl.Functions, b gl.Buffer) { + f.DeleteBuffer(b) + if b.Equal(s.arrayBuf) { + s.arrayBuf = gl.Buffer{} + } + if b.Equal(s.elemBuf) { + s.elemBuf = gl.Buffer{} + } + if b.Equal(s.uniBuf) { + s.uniBuf = gl.Buffer{} + } + if b.Equal(s.storeBuf) { + s.uniBuf = gl.Buffer{} + } + for i, b2 := range s.storeBufs { + if b.Equal(b2) { + s.storeBufs[i] = gl.Buffer{} + } + } + for i, b2 := range s.uniBufs { + if b.Equal(b2) { + s.uniBufs[i] = gl.Buffer{} + } + } +} + +func (s *glState) deleteProgram(f *gl.Functions, p gl.Program) { + f.DeleteProgram(p) + if p.Equal(s.prog) { + s.prog = gl.Program{} + } +} + +func (s *glState) deleteVertexArray(f *gl.Functions, a gl.VertexArray) { + f.DeleteVertexArray(a) + if a.Equal(s.vertArray) { + s.vertArray = gl.VertexArray{} + } +} + +func (s *glState) deleteTexture(f *gl.Functions, t gl.Texture) { + f.DeleteTexture(t) + binds := &s.texUnits.binds + for i, obj := range binds { + if t.Equal(obj) { + binds[i] = gl.Texture{} + } + } +} + +func (s *glState) useProgram(f *gl.Functions, p gl.Program) { + if !p.Equal(s.prog) { + f.UseProgram(p) + s.prog = p + } +} + +func (s *glState) bindFramebuffer(f *gl.Functions, target gl.Enum, fbo gl.Framebuffer) { + switch target { + case gl.FRAMEBUFFER: + if fbo.Equal(s.drawFBO) && fbo.Equal(s.readFBO) { + return + } + s.drawFBO = fbo + s.readFBO = fbo + case gl.READ_FRAMEBUFFER: + if fbo.Equal(s.readFBO) { + return + } + s.readFBO = fbo + case gl.DRAW_FRAMEBUFFER: + if fbo.Equal(s.drawFBO) { + return + } + s.drawFBO = fbo + default: + panic("unknown target") + } + f.BindFramebuffer(target, fbo) +} + +func (s *glState) bindBufferBase(f *gl.Functions, target gl.Enum, idx int, buf gl.Buffer) { + switch target { + case gl.UNIFORM_BUFFER: + if buf.Equal(s.uniBuf) && buf.Equal(s.uniBufs[idx]) { + return + } + s.uniBuf = buf + s.uniBufs[idx] = buf + case gl.SHADER_STORAGE_BUFFER: + if buf.Equal(s.storeBuf) && buf.Equal(s.storeBufs[idx]) { + return + } + s.storeBuf = buf + s.storeBufs[idx] = buf + default: + panic("unknown buffer target") + } + f.BindBufferBase(target, idx, buf) +} + +func (s *glState) bindBuffer(f *gl.Functions, target gl.Enum, buf gl.Buffer) { + switch target { + case gl.ARRAY_BUFFER: + if buf.Equal(s.arrayBuf) { + return + } + s.arrayBuf = buf + case gl.ELEMENT_ARRAY_BUFFER: + if buf.Equal(s.elemBuf) { + return + } + s.elemBuf = buf + case gl.UNIFORM_BUFFER: + if buf.Equal(s.uniBuf) { + return + } + s.uniBuf = buf + case gl.SHADER_STORAGE_BUFFER: + if buf.Equal(s.storeBuf) { + return + } + s.storeBuf = buf + default: + panic("unknown buffer target") + } + f.BindBuffer(target, buf) +} + +func (s *glState) pixelStorei(f *gl.Functions, pname gl.Enum, val int) { + switch pname { + case gl.UNPACK_ROW_LENGTH: + if val == s.unpack_row_length { + return + } + s.unpack_row_length = val + case gl.PACK_ROW_LENGTH: + if val == s.pack_row_length { + return + } + s.pack_row_length = val + default: + panic("unsupported PixelStorei pname") + } + f.PixelStorei(pname, val) +} + +func (s *glState) setClearColor(f *gl.Functions, r, g, b, a float32) { + col := [4]float32{r, g, b, a} + if col != s.clearColor { + f.ClearColor(r, g, b, a) + s.clearColor = col + } +} + +func (s *glState) setViewport(f *gl.Functions, x, y, width, height int) { + view := [4]int{x, y, width, height} + if view != s.viewport { + f.Viewport(x, y, width, height) + s.viewport = view + } +} + +func (s *glState) setBlendFuncSeparate(f *gl.Functions, srcRGB, dstRGB, srcA, dstA gl.Enum) { + if srcRGB != s.blend.srcRGB || dstRGB != s.blend.dstRGB || srcA != s.blend.srcA || dstA != s.blend.dstA { + s.blend.srcRGB = srcRGB + s.blend.dstRGB = dstRGB + s.blend.srcA = srcA + s.blend.dstA = dstA + f.BlendFuncSeparate(srcA, dstA, srcA, dstA) + } +} + +func (s *glState) set(f *gl.Functions, target gl.Enum, enable bool) { + switch target { + case gl.FRAMEBUFFER_SRGB: + if s.srgb == enable { + return + } + s.srgb = enable + case gl.BLEND: + if enable == s.blend.enable { + return + } + s.blend.enable = enable + default: + panic("unknown enable") + } + if enable { + f.Enable(target) + } else { + f.Disable(target) + } +} + +func (b *Backend) Caps() driver.Caps { + return b.feats +} + +func (b *Backend) NewTimer() driver.Timer { + return &timer{ + funcs: b.funcs, + obj: b.funcs.CreateQuery(), + } +} + +func (b *Backend) IsTimeContinuous() bool { + return b.funcs.GetInteger(gl.GPU_DISJOINT_EXT) == gl.FALSE +} + +func (t *texture) ensureFBO() gl.Framebuffer { + if t.hasFBO { + return t.fbo + } + b := t.backend + oldFBO := b.glstate.drawFBO + defer func() { + b.glstate.bindFramebuffer(b.funcs, gl.FRAMEBUFFER, oldFBO) + }() + glErr(b.funcs) + fb := b.funcs.CreateFramebuffer() + b.glstate.bindFramebuffer(b.funcs, gl.FRAMEBUFFER, fb) + if err := glErr(b.funcs); err != nil { + b.funcs.DeleteFramebuffer(fb) + panic(err) + } + b.funcs.FramebufferTexture2D(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, t.obj, 0) + if st := b.funcs.CheckFramebufferStatus(gl.FRAMEBUFFER); st != gl.FRAMEBUFFER_COMPLETE { + b.funcs.DeleteFramebuffer(fb) + panic(fmt.Errorf("incomplete framebuffer, status = 0x%x, err = %d", st, b.funcs.GetError())) + } + t.fbo = fb + t.hasFBO = true + return fb +} + +func (b *Backend) NewTexture(format driver.TextureFormat, width, height int, minFilter, magFilter driver.TextureFilter, binding driver.BufferBinding) (driver.Texture, error) { + glErr(b.funcs) + tex := &texture{backend: b, obj: b.funcs.CreateTexture(), width: width, height: height, bindings: binding} + switch format { + case driver.TextureFormatFloat: + tex.triple = b.floatTriple + case driver.TextureFormatSRGBA: + tex.triple = b.srgbaTriple + case driver.TextureFormatRGBA8: + tex.triple = textureTriple{gl.RGBA8, gl.RGBA, gl.UNSIGNED_BYTE} + default: + return nil, errors.New("unsupported texture format") + } + b.BindTexture(0, tex) + b.funcs.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, toTexFilter(magFilter)) + b.funcs.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, toTexFilter(minFilter)) + b.funcs.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE) + b.funcs.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE) + if b.gles && b.glver[0] >= 3 { + // Immutable textures are required for BindImageTexture, and can't hurt otherwise. + b.funcs.TexStorage2D(gl.TEXTURE_2D, 1, tex.triple.internalFormat, width, height) + } else { + b.funcs.TexImage2D(gl.TEXTURE_2D, 0, tex.triple.internalFormat, width, height, tex.triple.format, tex.triple.typ) + } + if err := glErr(b.funcs); err != nil { + tex.Release() + return nil, err + } + return tex, nil +} + +func (b *Backend) NewBuffer(typ driver.BufferBinding, size int) (driver.Buffer, error) { + glErr(b.funcs) + buf := &buffer{backend: b, typ: typ, size: size} + if typ&driver.BufferBindingUniforms != 0 { + if typ != driver.BufferBindingUniforms { + return nil, errors.New("uniforms buffers cannot be bound as anything else") + } + buf.data = make([]byte, size) + } + if typ&^driver.BufferBindingUniforms != 0 { + buf.hasBuffer = true + buf.obj = b.funcs.CreateBuffer() + if err := glErr(b.funcs); err != nil { + buf.Release() + return nil, err + } + firstBinding := firstBufferType(typ) + b.glstate.bindBuffer(b.funcs, firstBinding, buf.obj) + b.funcs.BufferData(firstBinding, size, gl.DYNAMIC_DRAW, nil) + } + return buf, nil +} + +func (b *Backend) NewImmutableBuffer(typ driver.BufferBinding, data []byte) (driver.Buffer, error) { + glErr(b.funcs) + obj := b.funcs.CreateBuffer() + buf := &buffer{backend: b, obj: obj, typ: typ, size: len(data), hasBuffer: true} + firstBinding := firstBufferType(typ) + b.glstate.bindBuffer(b.funcs, firstBinding, buf.obj) + b.funcs.BufferData(firstBinding, len(data), gl.STATIC_DRAW, data) + buf.immutable = true + if err := glErr(b.funcs); err != nil { + buf.Release() + return nil, err + } + return buf, nil +} + +func glErr(f *gl.Functions) error { + if st := f.GetError(); st != gl.NO_ERROR { + return fmt.Errorf("glGetError: %#x", st) + } + return nil +} + +func (b *Backend) Release() { + if b.sRGBFBO != nil { + b.sRGBFBO.Release() + } + if b.vertArray.Valid() { + b.glstate.deleteVertexArray(b.funcs, b.vertArray) + } + *b = Backend{} +} + +func (b *Backend) DispatchCompute(x, y, z int) { + for binding, buf := range b.storage { + if buf != nil { + b.glstate.bindBufferBase(b.funcs, gl.SHADER_STORAGE_BUFFER, binding, buf.obj) + } + } + b.funcs.DispatchCompute(x, y, z) + b.funcs.MemoryBarrier(gl.ALL_BARRIER_BITS) +} + +func (b *Backend) BindImageTexture(unit int, tex driver.Texture) { + t := tex.(*texture) + var acc gl.Enum + switch t.bindings & (driver.BufferBindingShaderStorageRead | driver.BufferBindingShaderStorageWrite) { + case driver.BufferBindingShaderStorageRead: + acc = gl.READ_ONLY + case driver.BufferBindingShaderStorageWrite: + acc = gl.WRITE_ONLY + case driver.BufferBindingShaderStorageRead | driver.BufferBindingShaderStorageWrite: + acc = gl.READ_WRITE + default: + panic("unsupported access bits") + } + b.funcs.BindImageTexture(unit, t.obj, 0, false, 0, acc, t.triple.internalFormat) +} + +func (b *Backend) BlendFunc(sfactor, dfactor driver.BlendFactor) { + src, dst := toGLBlendFactor(sfactor), toGLBlendFactor(dfactor) + b.glstate.setBlendFuncSeparate(b.funcs, src, dst, src, dst) +} + +func toGLBlendFactor(f driver.BlendFactor) gl.Enum { + switch f { + case driver.BlendFactorOne: + return gl.ONE + case driver.BlendFactorOneMinusSrcAlpha: + return gl.ONE_MINUS_SRC_ALPHA + case driver.BlendFactorZero: + return gl.ZERO + case driver.BlendFactorDstColor: + return gl.DST_COLOR + default: + panic("unsupported blend factor") + } +} + +func (b *Backend) SetBlend(enable bool) { + b.glstate.set(b.funcs, gl.BLEND, enable) +} + +func (b *Backend) DrawElements(off, count int) { + b.prepareDraw() + // off is in 16-bit indices, but DrawElements take a byte offset. + byteOff := off * 2 + b.funcs.DrawElements(toGLDrawMode(b.state.pipeline.topology), count, gl.UNSIGNED_SHORT, byteOff) +} + +func (b *Backend) DrawArrays(off, count int) { + b.prepareDraw() + b.funcs.DrawArrays(toGLDrawMode(b.state.pipeline.topology), off, count) +} + +func (b *Backend) prepareDraw() { + p := b.state.pipeline + if p == nil { + return + } + b.setupVertexArrays() +} + +func toGLDrawMode(mode driver.Topology) gl.Enum { + switch mode { + case driver.TopologyTriangleStrip: + return gl.TRIANGLE_STRIP + case driver.TopologyTriangles: + return gl.TRIANGLES + default: + panic("unsupported draw mode") + } +} + +func (b *Backend) Viewport(x, y, width, height int) { + b.glstate.setViewport(b.funcs, x, y, width, height) +} + +func (b *Backend) clearOutput(colR, colG, colB, colA float32) { + b.glstate.setClearColor(b.funcs, colR, colG, colB, colA) + b.funcs.Clear(gl.COLOR_BUFFER_BIT) +} + +func (b *Backend) NewComputeProgram(src shader.Sources) (driver.Program, error) { + // We don't support ES 3.1 compute, see brokenGLES31 above. + const GLES31Source = "" + p, err := gl.CreateComputeProgram(b.funcs, GLES31Source) + if err != nil { + return nil, fmt.Errorf("%s: %v", src.Name, err) + } + return &program{ + backend: b, + obj: p, + }, nil +} + +func (b *Backend) NewVertexShader(src shader.Sources) (driver.VertexShader, error) { + glslSrc := b.glslFor(src) + sh, err := gl.CreateShader(b.funcs, gl.VERTEX_SHADER, glslSrc) + return &glshader{backend: b, obj: sh, src: src}, err +} + +func (b *Backend) NewFragmentShader(src shader.Sources) (driver.FragmentShader, error) { + glslSrc := b.glslFor(src) + sh, err := gl.CreateShader(b.funcs, gl.FRAGMENT_SHADER, glslSrc) + return &glshader{backend: b, obj: sh, src: src}, err +} + +func (b *Backend) glslFor(src shader.Sources) string { + if b.gles { + return src.GLSL100ES + } else { + return src.GLSL150 + } +} + +func (b *Backend) NewPipeline(desc driver.PipelineDesc) (driver.Pipeline, error) { + p, err := b.newProgram(desc) + if err != nil { + return nil, err + } + layout := desc.VertexLayout + vsrc := desc.VertexShader.(*glshader).src + if len(vsrc.Inputs) != len(layout.Inputs) { + return nil, fmt.Errorf("opengl: got %d inputs, expected %d", len(layout.Inputs), len(vsrc.Inputs)) + } + for i, inp := range vsrc.Inputs { + if exp, got := inp.Size, layout.Inputs[i].Size; exp != got { + return nil, fmt.Errorf("opengl: data size mismatch for %q: got %d expected %d", inp.Name, got, exp) + } + } + return &pipeline{ + prog: p, + inputs: vsrc.Inputs, + layout: layout, + blend: desc.BlendDesc, + topology: desc.Topology, + }, nil +} + +func (b *Backend) newProgram(desc driver.PipelineDesc) (*program, error) { + p := b.funcs.CreateProgram() + if !p.Valid() { + return nil, errors.New("opengl: glCreateProgram failed") + } + vsh, fsh := desc.VertexShader.(*glshader), desc.FragmentShader.(*glshader) + b.funcs.AttachShader(p, vsh.obj) + b.funcs.AttachShader(p, fsh.obj) + for _, inp := range vsh.src.Inputs { + b.funcs.BindAttribLocation(p, gl.Attrib(inp.Location), inp.Name) + } + b.funcs.LinkProgram(p) + if b.funcs.GetProgrami(p, gl.LINK_STATUS) == 0 { + log := b.funcs.GetProgramInfoLog(p) + b.funcs.DeleteProgram(p) + return nil, fmt.Errorf("opengl: program link failed: %s", strings.TrimSpace(log)) + } + prog := &program{ + backend: b, + obj: p, + } + b.glstate.useProgram(b.funcs, p) + // Bind texture uniforms. + for _, tex := range vsh.src.Textures { + u := b.funcs.GetUniformLocation(p, tex.Name) + if u.Valid() { + b.funcs.Uniform1i(u, tex.Binding) + } + } + for _, tex := range fsh.src.Textures { + u := b.funcs.GetUniformLocation(p, tex.Name) + if u.Valid() { + b.funcs.Uniform1i(u, tex.Binding) + } + } + prog.vertUniforms.setup(b.funcs, p, vsh.src.Uniforms.Size, vsh.src.Uniforms.Locations) + prog.fragUniforms.setup(b.funcs, p, fsh.src.Uniforms.Size, fsh.src.Uniforms.Locations) + return prog, nil +} + +func (b *Backend) BindStorageBuffer(binding int, buf driver.Buffer) { + bf := buf.(*buffer) + if bf.typ&(driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite) == 0 { + panic("not a shader storage buffer") + } + b.storage[binding] = bf +} + +func (b *Backend) BindUniforms(buf driver.Buffer) { + bf := buf.(*buffer) + if bf.typ&driver.BufferBindingUniforms == 0 { + panic("not a uniform buffer") + } + b.state.pipeline.prog.vertUniforms.update(b.funcs, bf) + b.state.pipeline.prog.fragUniforms.update(b.funcs, bf) +} + +func (b *Backend) BindProgram(prog driver.Program) { + p := prog.(*program) + b.glstate.useProgram(b.funcs, p.obj) +} + +func (s *glshader) Release() { + s.backend.funcs.DeleteShader(s.obj) +} + +func (p *program) Release() { + p.backend.glstate.deleteProgram(p.backend.funcs, p.obj) +} + +func (u *uniforms) setup(funcs *gl.Functions, p gl.Program, uniformSize int, uniforms []shader.UniformLocation) { + u.locs = make([]uniformLocation, len(uniforms)) + for i, uniform := range uniforms { + loc := funcs.GetUniformLocation(p, uniform.Name) + u.locs[i] = uniformLocation{uniform: loc, offset: uniform.Offset, typ: uniform.Type, size: uniform.Size} + } + u.size = uniformSize +} + +func (p *uniforms) update(funcs *gl.Functions, buf *buffer) { + if buf.size < p.size { + panic(fmt.Errorf("uniform buffer too small, got %d need %d", buf.size, p.size)) + } + data := buf.data + for _, u := range p.locs { + if !u.uniform.Valid() { + continue + } + data := data[u.offset:] + switch { + case u.typ == shader.DataTypeFloat && u.size == 1: + data := data[:4] + v := *(*[1]float32)(unsafe.Pointer(&data[0])) + funcs.Uniform1f(u.uniform, v[0]) + case u.typ == shader.DataTypeFloat && u.size == 2: + data := data[:8] + v := *(*[2]float32)(unsafe.Pointer(&data[0])) + funcs.Uniform2f(u.uniform, v[0], v[1]) + case u.typ == shader.DataTypeFloat && u.size == 3: + data := data[:12] + v := *(*[3]float32)(unsafe.Pointer(&data[0])) + funcs.Uniform3f(u.uniform, v[0], v[1], v[2]) + case u.typ == shader.DataTypeFloat && u.size == 4: + data := data[:16] + v := *(*[4]float32)(unsafe.Pointer(&data[0])) + funcs.Uniform4f(u.uniform, v[0], v[1], v[2], v[3]) + default: + panic("unsupported uniform data type or size") + } + } +} + +func (b *buffer) Upload(data []byte) { + if b.immutable { + panic("immutable buffer") + } + if len(data) > b.size { + panic("buffer size overflow") + } + copy(b.data, data) + if b.hasBuffer { + firstBinding := firstBufferType(b.typ) + b.backend.glstate.bindBuffer(b.backend.funcs, firstBinding, b.obj) + if len(data) == b.size { + // the iOS GL implementation doesn't recognize when BufferSubData + // clears the entire buffer. Tell it and avoid GPU stalls. + // See also https://github.com/godotengine/godot/issues/23956. + b.backend.funcs.BufferData(firstBinding, b.size, gl.DYNAMIC_DRAW, data) + } else { + b.backend.funcs.BufferSubData(firstBinding, 0, data) + } + } +} + +func (b *buffer) Download(data []byte) error { + if len(data) > b.size { + panic("buffer size overflow") + } + if !b.hasBuffer { + copy(data, b.data) + return nil + } + firstBinding := firstBufferType(b.typ) + b.backend.glstate.bindBuffer(b.backend.funcs, firstBinding, b.obj) + bufferMap := b.backend.funcs.MapBufferRange(firstBinding, 0, len(data), gl.MAP_READ_BIT) + if bufferMap == nil { + return fmt.Errorf("MapBufferRange: error %#x", b.backend.funcs.GetError()) + } + copy(data, bufferMap) + if !b.backend.funcs.UnmapBuffer(firstBinding) { + return driver.ErrContentLost + } + return nil +} + +func (b *buffer) Release() { + if b.hasBuffer { + b.backend.glstate.deleteBuffer(b.backend.funcs, b.obj) + b.hasBuffer = false + } +} + +func (b *Backend) BindVertexBuffer(buf driver.Buffer, offset int) { + gbuf := buf.(*buffer) + if gbuf.typ&driver.BufferBindingVertices == 0 { + panic("not a vertex buffer") + } + b.state.buffer = bufferBinding{obj: gbuf.obj, offset: offset} +} + +func (b *Backend) setupVertexArrays() { + p := b.state.pipeline + inputs := p.inputs + if len(inputs) == 0 { + return + } + layout := p.layout + const max = len(b.glstate.vertAttribs) + var enabled [max]bool + buf := b.state.buffer + for i, inp := range inputs { + l := layout.Inputs[i] + var gltyp gl.Enum + switch l.Type { + case shader.DataTypeFloat: + gltyp = gl.FLOAT + case shader.DataTypeShort: + gltyp = gl.SHORT + default: + panic("unsupported data type") + } + enabled[inp.Location] = true + b.glstate.vertexAttribPointer(b.funcs, buf.obj, inp.Location, l.Size, gltyp, false, p.layout.Stride, buf.offset+l.Offset) + } + for i := 0; i < max; i++ { + b.glstate.setVertexAttribArray(b.funcs, i, enabled[i]) + } +} + +func (b *Backend) BindIndexBuffer(buf driver.Buffer) { + gbuf := buf.(*buffer) + if gbuf.typ&driver.BufferBindingIndices == 0 { + panic("not an index buffer") + } + b.glstate.bindBuffer(b.funcs, gl.ELEMENT_ARRAY_BUFFER, gbuf.obj) +} + +func (b *Backend) CopyTexture(dst driver.Texture, dstOrigin image.Point, src driver.Texture, srcRect image.Rectangle) { + const unit = 0 + oldTex := b.glstate.texUnits.binds[unit] + defer func() { + b.glstate.bindTexture(b.funcs, unit, oldTex) + }() + b.glstate.bindTexture(b.funcs, unit, dst.(*texture).obj) + b.glstate.bindFramebuffer(b.funcs, gl.FRAMEBUFFER, src.(*texture).ensureFBO()) + sz := srcRect.Size() + b.funcs.CopyTexSubImage2D(gl.TEXTURE_2D, 0, dstOrigin.X, dstOrigin.Y, srcRect.Min.X, srcRect.Min.Y, sz.X, sz.Y) +} + +func (t *texture) ReadPixels(src image.Rectangle, pixels []byte, stride int) error { + glErr(t.backend.funcs) + t.backend.glstate.bindFramebuffer(t.backend.funcs, gl.FRAMEBUFFER, t.ensureFBO()) + if len(pixels) < src.Dx()*src.Dy()*4 { + return errors.New("unexpected RGBA size") + } + w, h := src.Dx(), src.Dy() + // WebGL 1 doesn't support PACK_ROW_LENGTH != 0. Avoid it if possible. + rowLen := 0 + if n := stride / 4; n != w { + rowLen = n + } + t.backend.glstate.pixelStorei(t.backend.funcs, gl.PACK_ROW_LENGTH, rowLen) + t.backend.funcs.ReadPixels(src.Min.X, src.Min.Y, w, h, gl.RGBA, gl.UNSIGNED_BYTE, pixels) + return glErr(t.backend.funcs) +} + +func (b *Backend) BindPipeline(pl driver.Pipeline) { + p := pl.(*pipeline) + b.state.pipeline = p + b.glstate.useProgram(b.funcs, p.prog.obj) + b.SetBlend(p.blend.Enable) + b.BlendFunc(p.blend.SrcFactor, p.blend.DstFactor) +} + +func (b *Backend) BeginCompute() { + b.funcs.MemoryBarrier(gl.ALL_BARRIER_BITS) +} + +func (b *Backend) EndCompute() { +} + +func (b *Backend) BeginRenderPass(tex driver.Texture, desc driver.LoadDesc) { + fbo := tex.(*texture).ensureFBO() + b.glstate.bindFramebuffer(b.funcs, gl.FRAMEBUFFER, fbo) + switch desc.Action { + case driver.LoadActionClear: + c := desc.ClearColor + b.clearOutput(c.R, c.G, c.B, c.A) + case driver.LoadActionInvalidate: + b.funcs.InvalidateFramebuffer(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0) + } +} + +func (b *Backend) EndRenderPass() { +} + +func (f *texture) ImplementsRenderTarget() {} + +func (p *pipeline) Release() { + p.prog.Release() + *p = pipeline{} +} + +func toTexFilter(f driver.TextureFilter) int { + switch f { + case driver.FilterNearest: + return gl.NEAREST + case driver.FilterLinear: + return gl.LINEAR + default: + panic("unsupported texture filter") + } +} + +func (b *Backend) PrepareTexture(tex driver.Texture) {} + +func (b *Backend) BindTexture(unit int, t driver.Texture) { + b.glstate.bindTexture(b.funcs, unit, t.(*texture).obj) +} + +func (t *texture) Release() { + if t.foreign { + panic("texture not created by NewTexture") + } + if t.hasFBO { + t.backend.glstate.deleteFramebuffer(t.backend.funcs, t.fbo) + } + t.backend.glstate.deleteTexture(t.backend.funcs, t.obj) +} + +func (t *texture) Upload(offset, size image.Point, pixels []byte, stride int) { + if min := size.X * size.Y * 4; min > len(pixels) { + panic(fmt.Errorf("size %d larger than data %d", min, len(pixels))) + } + t.backend.BindTexture(0, t) + // WebGL 1 doesn't support UNPACK_ROW_LENGTH != 0. Avoid it if possible. + rowLen := 0 + if n := stride / 4; n != size.X { + rowLen = n + } + t.backend.glstate.pixelStorei(t.backend.funcs, gl.UNPACK_ROW_LENGTH, rowLen) + t.backend.funcs.TexSubImage2D(gl.TEXTURE_2D, 0, offset.X, offset.Y, size.X, size.Y, t.triple.format, t.triple.typ, pixels) +} + +func (t *timer) Begin() { + t.funcs.BeginQuery(gl.TIME_ELAPSED_EXT, t.obj) +} + +func (t *timer) End() { + t.funcs.EndQuery(gl.TIME_ELAPSED_EXT) +} + +func (t *timer) ready() bool { + return t.funcs.GetQueryObjectuiv(t.obj, gl.QUERY_RESULT_AVAILABLE) == gl.TRUE +} + +func (t *timer) Release() { + t.funcs.DeleteQuery(t.obj) +} + +func (t *timer) Duration() (time.Duration, bool) { + if !t.ready() { + return 0, false + } + nanos := t.funcs.GetQueryObjectuiv(t.obj, gl.QUERY_RESULT) + return time.Duration(nanos), true +} + +// floatTripleFor determines the best texture triple for floating point FBOs. +func floatTripleFor(f *gl.Functions, ver [2]int, exts []string) (textureTriple, error) { + var triples []textureTriple + if ver[0] >= 3 { + triples = append(triples, textureTriple{gl.R16F, gl.Enum(gl.RED), gl.Enum(gl.HALF_FLOAT)}) + } + // According to the OES_texture_half_float specification, EXT_color_buffer_half_float is needed to + // render to FBOs. However, the Safari WebGL1 implementation does support half-float FBOs but does not + // report EXT_color_buffer_half_float support. The triples are verified below, so it doesn't matter if we're + // wrong. + if hasExtension(exts, "GL_OES_texture_half_float") || hasExtension(exts, "GL_EXT_color_buffer_half_float") { + // Try single channel. + triples = append(triples, textureTriple{gl.LUMINANCE, gl.Enum(gl.LUMINANCE), gl.Enum(gl.HALF_FLOAT_OES)}) + // Fallback to 4 channels. + triples = append(triples, textureTriple{gl.RGBA, gl.Enum(gl.RGBA), gl.Enum(gl.HALF_FLOAT_OES)}) + } + if hasExtension(exts, "GL_OES_texture_float") || hasExtension(exts, "GL_EXT_color_buffer_float") { + triples = append(triples, textureTriple{gl.RGBA, gl.Enum(gl.RGBA), gl.Enum(gl.FLOAT)}) + } + tex := f.CreateTexture() + defer f.DeleteTexture(tex) + defTex := gl.Texture(f.GetBinding(gl.TEXTURE_BINDING_2D)) + defer f.BindTexture(gl.TEXTURE_2D, defTex) + f.BindTexture(gl.TEXTURE_2D, tex) + f.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE) + f.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE) + f.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST) + f.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST) + fbo := f.CreateFramebuffer() + defer f.DeleteFramebuffer(fbo) + defFBO := gl.Framebuffer(f.GetBinding(gl.FRAMEBUFFER_BINDING)) + f.BindFramebuffer(gl.FRAMEBUFFER, fbo) + defer f.BindFramebuffer(gl.FRAMEBUFFER, defFBO) + var attempts []string + for _, tt := range triples { + const size = 256 + f.TexImage2D(gl.TEXTURE_2D, 0, tt.internalFormat, size, size, tt.format, tt.typ) + f.FramebufferTexture2D(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, tex, 0) + st := f.CheckFramebufferStatus(gl.FRAMEBUFFER) + if st == gl.FRAMEBUFFER_COMPLETE { + return tt, nil + } + attempts = append(attempts, fmt.Sprintf("(0x%x, 0x%x, 0x%x): 0x%x", tt.internalFormat, tt.format, tt.typ, st)) + } + return textureTriple{}, fmt.Errorf("floating point fbos not supported (attempted %s)", attempts) +} + +func srgbaTripleFor(ver [2]int, exts []string) (textureTriple, error) { + switch { + case ver[0] >= 3: + return textureTriple{gl.SRGB8_ALPHA8, gl.Enum(gl.RGBA), gl.Enum(gl.UNSIGNED_BYTE)}, nil + case hasExtension(exts, "GL_EXT_sRGB"): + return textureTriple{gl.SRGB_ALPHA_EXT, gl.Enum(gl.SRGB_ALPHA_EXT), gl.Enum(gl.UNSIGNED_BYTE)}, nil + default: + return textureTriple{}, errors.New("no sRGB texture formats found") + } +} + +func alphaTripleFor(ver [2]int) textureTriple { + intf, f := gl.Enum(gl.R8), gl.Enum(gl.RED) + if ver[0] < 3 { + // R8, RED not supported on OpenGL ES 2.0. + intf, f = gl.LUMINANCE, gl.Enum(gl.LUMINANCE) + } + return textureTriple{intf, f, gl.UNSIGNED_BYTE} +} + +func hasExtension(exts []string, ext string) bool { + for _, e := range exts { + if ext == e { + return true + } + } + return false +} + +func firstBufferType(typ driver.BufferBinding) gl.Enum { + switch { + case typ&driver.BufferBindingIndices != 0: + return gl.ELEMENT_ARRAY_BUFFER + case typ&driver.BufferBindingVertices != 0: + return gl.ARRAY_BUFFER + case typ&driver.BufferBindingUniforms != 0: + return gl.UNIFORM_BUFFER + case typ&(driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite) != 0: + return gl.SHADER_STORAGE_BUFFER + default: + panic("unsupported buffer type") + } +} diff --git a/vendor/gioui.org/gpu/internal/opengl/srgb.go b/vendor/gioui.org/gpu/internal/opengl/srgb.go new file mode 100644 index 0000000..4871d94 --- /dev/null +++ b/vendor/gioui.org/gpu/internal/opengl/srgb.go @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package opengl + +import ( + "errors" + "fmt" + "image" + "runtime" + "strings" + + "gioui.org/internal/byteslice" + "gioui.org/internal/gl" +) + +// SRGBFBO implements an intermediate sRGB FBO +// for gamma-correct rendering on platforms without +// sRGB enabled native framebuffers. +type SRGBFBO struct { + c *gl.Functions + state *glState + viewport image.Point + fbo gl.Framebuffer + tex gl.Texture + blitted bool + quad gl.Buffer + prog gl.Program + format textureTriple +} + +func NewSRGBFBO(f *gl.Functions, state *glState) (*SRGBFBO, error) { + glVer := f.GetString(gl.VERSION) + ver, _, err := gl.ParseGLVersion(glVer) + if err != nil { + return nil, err + } + exts := strings.Split(f.GetString(gl.EXTENSIONS), " ") + srgbTriple, err := srgbaTripleFor(ver, exts) + if err != nil { + // Fall back to the linear RGB colorspace, at the cost of color precision loss. + srgbTriple = textureTriple{gl.RGBA, gl.Enum(gl.RGBA), gl.Enum(gl.UNSIGNED_BYTE)} + } + s := &SRGBFBO{ + c: f, + state: state, + format: srgbTriple, + fbo: f.CreateFramebuffer(), + tex: f.CreateTexture(), + } + state.bindTexture(f, 0, s.tex) + f.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE) + f.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE) + f.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST) + f.TexParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST) + return s, nil +} + +func (s *SRGBFBO) Blit() { + if !s.blitted { + prog, err := gl.CreateProgram(s.c, blitVSrc, blitFSrc, []string{"pos", "uv"}) + if err != nil { + panic(err) + } + s.prog = prog + s.state.useProgram(s.c, prog) + s.c.Uniform1i(s.c.GetUniformLocation(prog, "tex"), 0) + s.quad = s.c.CreateBuffer() + s.state.bindBuffer(s.c, gl.ARRAY_BUFFER, s.quad) + coords := byteslice.Slice([]float32{ + -1, +1, 0, 1, + +1, +1, 1, 1, + -1, -1, 0, 0, + +1, -1, 1, 0, + }) + s.c.BufferData(gl.ARRAY_BUFFER, len(coords), gl.STATIC_DRAW, coords) + s.blitted = true + } + s.state.useProgram(s.c, s.prog) + s.state.bindTexture(s.c, 0, s.tex) + s.state.vertexAttribPointer(s.c, s.quad, 0 /* pos */, 2, gl.FLOAT, false, 4*4, 0) + s.state.vertexAttribPointer(s.c, s.quad, 1 /* uv */, 2, gl.FLOAT, false, 4*4, 4*2) + s.state.setVertexAttribArray(s.c, 0, true) + s.state.setVertexAttribArray(s.c, 1, true) + s.c.DrawArrays(gl.TRIANGLE_STRIP, 0, 4) + s.state.bindFramebuffer(s.c, gl.FRAMEBUFFER, s.fbo) + s.c.InvalidateFramebuffer(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0) +} + +func (s *SRGBFBO) Framebuffer() gl.Framebuffer { + return s.fbo +} + +func (s *SRGBFBO) Refresh(viewport image.Point) error { + if viewport.X == 0 || viewport.Y == 0 { + return errors.New("srgb: zero-sized framebuffer") + } + if s.viewport == viewport { + return nil + } + s.viewport = viewport + s.state.bindTexture(s.c, 0, s.tex) + s.c.TexImage2D(gl.TEXTURE_2D, 0, s.format.internalFormat, viewport.X, viewport.Y, s.format.format, s.format.typ) + s.state.bindFramebuffer(s.c, gl.FRAMEBUFFER, s.fbo) + s.c.FramebufferTexture2D(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, s.tex, 0) + if st := s.c.CheckFramebufferStatus(gl.FRAMEBUFFER); st != gl.FRAMEBUFFER_COMPLETE { + return fmt.Errorf("sRGB framebuffer incomplete (%dx%d), status: %#x error: %x", viewport.X, viewport.Y, st, s.c.GetError()) + } + + if runtime.GOOS == "js" { + // With macOS Safari, rendering to and then reading from a SRGB8_ALPHA8 + // texture result in twice gamma corrected colors. Using a plain RGBA + // texture seems to work. + s.state.setClearColor(s.c, .5, .5, .5, 1.0) + s.c.Clear(gl.COLOR_BUFFER_BIT) + var pixel [4]byte + s.c.ReadPixels(0, 0, 1, 1, gl.RGBA, gl.UNSIGNED_BYTE, pixel[:]) + if pixel[0] == 128 { // Correct sRGB color value is ~188 + s.c.TexImage2D(gl.TEXTURE_2D, 0, gl.RGBA, viewport.X, viewport.Y, gl.RGBA, gl.UNSIGNED_BYTE) + if st := s.c.CheckFramebufferStatus(gl.FRAMEBUFFER); st != gl.FRAMEBUFFER_COMPLETE { + return fmt.Errorf("fallback RGBA framebuffer incomplete (%dx%d), status: %#x error: %x", viewport.X, viewport.Y, st, s.c.GetError()) + } + } + } + + return nil +} + +func (s *SRGBFBO) Release() { + s.state.deleteFramebuffer(s.c, s.fbo) + s.state.deleteTexture(s.c, s.tex) + if s.blitted { + s.state.deleteBuffer(s.c, s.quad) + s.state.deleteProgram(s.c, s.prog) + } + s.c = nil +} + +const ( + blitVSrc = ` +#version 100 + +precision highp float; + +attribute vec2 pos; +attribute vec2 uv; + +varying vec2 vUV; + +void main() { + gl_Position = vec4(pos, 0, 1); + vUV = uv; +} +` + blitFSrc = ` +#version 100 + +precision mediump float; + +uniform sampler2D tex; +varying vec2 vUV; + +vec3 gamma(vec3 rgb) { + vec3 exp = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055); + vec3 lin = rgb * vec3(12.92); + bvec3 cut = lessThan(rgb, vec3(0.0031308)); + return vec3(cut.r ? lin.r : exp.r, cut.g ? lin.g : exp.g, cut.b ? lin.b : exp.b); +} + +void main() { + vec4 col = texture2D(tex, vUV); + vec3 rgb = col.rgb; + rgb = gamma(rgb); + gl_FragColor = vec4(rgb, col.a); +} +` +) diff --git a/vendor/gioui.org/gpu/internal/vulkan/vulkan.go b/vendor/gioui.org/gpu/internal/vulkan/vulkan.go new file mode 100644 index 0000000..7d3791e --- /dev/null +++ b/vendor/gioui.org/gpu/internal/vulkan/vulkan.go @@ -0,0 +1,1121 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +//go:build (linux || freebsd) && !novulkan +// +build linux freebsd +// +build !novulkan + +package vulkan + +import ( + "errors" + "fmt" + "image" + + "gioui.org/gpu/internal/driver" + "gioui.org/internal/vk" + "gioui.org/shader" +) + +type Backend struct { + physDev vk.PhysicalDevice + dev vk.Device + queue vk.Queue + cmdPool struct { + current vk.CommandBuffer + pool vk.CommandPool + used int + buffers []vk.CommandBuffer + } + outFormat vk.Format + staging struct { + buf *Buffer + mem []byte + size int + cap int + } + defers []func(d vk.Device) + frameSig vk.Semaphore + waitSems []vk.Semaphore + waitStages []vk.PipelineStageFlags + sigSems []vk.Semaphore + fence vk.Fence + + allPipes []*Pipeline + + pipe *Pipeline + + passes map[passKey]vk.RenderPass + + // bindings and offset are temporary storage for BindVertexBuffer. + bindings []vk.Buffer + offsets []vk.DeviceSize + + desc struct { + dirty bool + texBinds [texUnits]*Texture + bufBinds [storageUnits]*Buffer + } + + caps driver.Features +} + +type passKey struct { + fmt vk.Format + loadAct vk.AttachmentLoadOp + initLayout vk.ImageLayout + finalLayout vk.ImageLayout +} + +type Texture struct { + backend *Backend + img vk.Image + mem vk.DeviceMemory + view vk.ImageView + sampler vk.Sampler + fbo vk.Framebuffer + format vk.Format + layout vk.ImageLayout + passLayout vk.ImageLayout + width int + height int + acquire vk.Semaphore + foreign bool + + scope struct { + stage vk.PipelineStageFlags + access vk.AccessFlags + } +} + +type Shader struct { + dev vk.Device + module vk.ShaderModule + pushRange vk.PushConstantRange + src shader.Sources +} + +type Pipeline struct { + backend *Backend + pipe vk.Pipeline + pushRanges []vk.PushConstantRange + ninputs int + desc *descPool +} + +type descPool struct { + layout vk.PipelineLayout + descLayout vk.DescriptorSetLayout + pool vk.DescriptorPool + size int + cap int + texBinds []int + imgBinds []int + bufBinds []int +} + +type Buffer struct { + backend *Backend + buf vk.Buffer + store []byte + mem vk.DeviceMemory + usage vk.BufferUsageFlags + + scope struct { + stage vk.PipelineStageFlags + access vk.AccessFlags + } +} + +const ( + texUnits = 4 + storageUnits = 4 +) + +func init() { + driver.NewVulkanDevice = newVulkanDevice +} + +func newVulkanDevice(api driver.Vulkan) (driver.Device, error) { + b := &Backend{ + physDev: vk.PhysicalDevice(api.PhysDevice), + dev: vk.Device(api.Device), + outFormat: vk.Format(api.Format), + caps: driver.FeatureCompute, + passes: make(map[passKey]vk.RenderPass), + } + b.queue = vk.GetDeviceQueue(b.dev, api.QueueFamily, api.QueueIndex) + cmdPool, err := vk.CreateCommandPool(b.dev, api.QueueFamily) + if err != nil { + return nil, err + } + b.cmdPool.pool = cmdPool + props := vk.GetPhysicalDeviceFormatProperties(b.physDev, vk.FORMAT_R16_SFLOAT) + reqs := vk.FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | vk.FORMAT_FEATURE_SAMPLED_IMAGE_BIT + if props&reqs == reqs { + b.caps |= driver.FeatureFloatRenderTargets + } + reqs = vk.FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT | vk.FORMAT_FEATURE_SAMPLED_IMAGE_BIT + props = vk.GetPhysicalDeviceFormatProperties(b.physDev, vk.FORMAT_R8G8B8A8_SRGB) + if props&reqs == reqs { + b.caps |= driver.FeatureSRGB + } + fence, err := vk.CreateFence(b.dev) + if err != nil { + return nil, mapErr(err) + } + b.fence = fence + return b, nil +} + +func (b *Backend) BeginFrame(target driver.RenderTarget, clear bool, viewport image.Point) driver.Texture { + vk.QueueWaitIdle(b.queue) + b.staging.size = 0 + b.cmdPool.used = 0 + b.runDefers() + b.resetPipes() + + if target == nil { + return nil + } + switch t := target.(type) { + case driver.VulkanRenderTarget: + layout := vk.IMAGE_LAYOUT_UNDEFINED + if !clear { + layout = vk.IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + } + b.frameSig = vk.Semaphore(t.SignalSem) + tex := &Texture{ + img: vk.Image(t.Image), + fbo: vk.Framebuffer(t.Framebuffer), + width: viewport.X, + height: viewport.Y, + layout: layout, + passLayout: vk.IMAGE_LAYOUT_PRESENT_SRC_KHR, + format: b.outFormat, + acquire: vk.Semaphore(t.WaitSem), + foreign: true, + } + return tex + case *Texture: + return t + default: + panic(fmt.Sprintf("vulkan: unsupported render target type: %T", t)) + } +} + +func (b *Backend) deferFunc(f func(d vk.Device)) { + b.defers = append(b.defers, f) +} + +func (b *Backend) runDefers() { + for _, f := range b.defers { + f(b.dev) + } + b.defers = b.defers[:0] +} + +func (b *Backend) resetPipes() { + for i := len(b.allPipes) - 1; i >= 0; i-- { + p := b.allPipes[i] + if p.pipe == 0 { + // Released pipeline. + b.allPipes = append(b.allPipes[:i], b.allPipes[:i+1]...) + continue + } + if p.desc.size > 0 { + vk.ResetDescriptorPool(b.dev, p.desc.pool) + p.desc.size = 0 + } + } +} + +func (b *Backend) EndFrame() { + if b.frameSig != 0 { + b.sigSems = append(b.sigSems, b.frameSig) + b.frameSig = 0 + } + b.submitCmdBuf(false) +} + +func (b *Backend) Caps() driver.Caps { + return driver.Caps{ + MaxTextureSize: 4096, + Features: b.caps, + } +} + +func (b *Backend) NewTimer() driver.Timer { + panic("timers not supported") +} + +func (b *Backend) IsTimeContinuous() bool { + panic("timers not supported") +} + +func (b *Backend) Release() { + vk.DeviceWaitIdle(b.dev) + if buf := b.staging.buf; buf != nil { + vk.UnmapMemory(b.dev, b.staging.buf.mem) + buf.Release() + } + b.runDefers() + for _, rp := range b.passes { + vk.DestroyRenderPass(b.dev, rp) + } + vk.DestroyFence(b.dev, b.fence) + vk.FreeCommandBuffers(b.dev, b.cmdPool.pool, b.cmdPool.buffers...) + vk.DestroyCommandPool(b.dev, b.cmdPool.pool) + *b = Backend{} +} + +func (b *Backend) NewTexture(format driver.TextureFormat, width, height int, minFilter, magFilter driver.TextureFilter, bindings driver.BufferBinding) (driver.Texture, error) { + vkfmt := formatFor(format) + usage := vk.IMAGE_USAGE_TRANSFER_DST_BIT | vk.IMAGE_USAGE_TRANSFER_SRC_BIT + passLayout := vk.IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + if bindings&driver.BufferBindingTexture != 0 { + usage |= vk.IMAGE_USAGE_SAMPLED_BIT + passLayout = vk.IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL + } + if bindings&driver.BufferBindingFramebuffer != 0 { + usage |= vk.IMAGE_USAGE_COLOR_ATTACHMENT_BIT + } + if bindings&(driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite) != 0 { + usage |= vk.IMAGE_USAGE_STORAGE_BIT + } + filterFor := func(f driver.TextureFilter) vk.Filter { + switch minFilter { + case driver.FilterLinear: + return vk.FILTER_LINEAR + case driver.FilterNearest: + return vk.FILTER_NEAREST + } + panic("unknown filter") + } + sampler, err := vk.CreateSampler(b.dev, filterFor(minFilter), filterFor(magFilter)) + if err != nil { + return nil, mapErr(err) + } + img, mem, err := vk.CreateImage(b.physDev, b.dev, vkfmt, width, height, usage) + if err != nil { + vk.DestroySampler(b.dev, sampler) + return nil, mapErr(err) + } + view, err := vk.CreateImageView(b.dev, img, vkfmt) + if err != nil { + vk.DestroySampler(b.dev, sampler) + vk.DestroyImage(b.dev, img) + vk.FreeMemory(b.dev, mem) + return nil, mapErr(err) + } + t := &Texture{backend: b, img: img, mem: mem, view: view, sampler: sampler, layout: vk.IMAGE_LAYOUT_UNDEFINED, passLayout: passLayout, width: width, height: height, format: vkfmt} + if bindings&driver.BufferBindingFramebuffer != 0 { + pass, err := vk.CreateRenderPass(b.dev, vkfmt, vk.ATTACHMENT_LOAD_OP_DONT_CARE, + vk.IMAGE_LAYOUT_UNDEFINED, vk.IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, nil) + if err != nil { + return nil, mapErr(err) + } + defer vk.DestroyRenderPass(b.dev, pass) + fbo, err := vk.CreateFramebuffer(b.dev, pass, view, width, height) + if err != nil { + return nil, mapErr(err) + } + t.fbo = fbo + } + return t, nil +} + +func (b *Backend) NewBuffer(bindings driver.BufferBinding, size int) (driver.Buffer, error) { + if bindings&driver.BufferBindingUniforms != 0 { + // Implement uniform buffers as inline push constants. + return &Buffer{store: make([]byte, size)}, nil + } + usage := vk.BUFFER_USAGE_TRANSFER_DST_BIT | vk.BUFFER_USAGE_TRANSFER_SRC_BIT + if bindings&driver.BufferBindingIndices != 0 { + usage |= vk.BUFFER_USAGE_INDEX_BUFFER_BIT + } + if bindings&(driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite) != 0 { + usage |= vk.BUFFER_USAGE_STORAGE_BUFFER_BIT + } + if bindings&driver.BufferBindingVertices != 0 { + usage |= vk.BUFFER_USAGE_VERTEX_BUFFER_BIT + } + buf, err := b.newBuffer(size, usage, vk.MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + return buf, mapErr(err) +} + +func (b *Backend) newBuffer(size int, usage vk.BufferUsageFlags, props vk.MemoryPropertyFlags) (*Buffer, error) { + buf, mem, err := vk.CreateBuffer(b.physDev, b.dev, size, usage, props) + return &Buffer{backend: b, buf: buf, mem: mem, usage: usage}, err +} + +func (b *Backend) NewImmutableBuffer(typ driver.BufferBinding, data []byte) (driver.Buffer, error) { + buf, err := b.NewBuffer(typ, len(data)) + if err != nil { + return nil, err + } + buf.Upload(data) + return buf, nil +} + +func (b *Backend) NewVertexShader(src shader.Sources) (driver.VertexShader, error) { + sh, err := b.newShader(src, vk.SHADER_STAGE_VERTEX_BIT) + return sh, mapErr(err) +} + +func (b *Backend) NewFragmentShader(src shader.Sources) (driver.FragmentShader, error) { + sh, err := b.newShader(src, vk.SHADER_STAGE_FRAGMENT_BIT) + return sh, mapErr(err) +} + +func (b *Backend) NewPipeline(desc driver.PipelineDesc) (driver.Pipeline, error) { + vs := desc.VertexShader.(*Shader) + fs := desc.FragmentShader.(*Shader) + var ranges []vk.PushConstantRange + if r := vs.pushRange; r != (vk.PushConstantRange{}) { + ranges = append(ranges, r) + } + if r := fs.pushRange; r != (vk.PushConstantRange{}) { + ranges = append(ranges, r) + } + descPool, err := createPipelineLayout(b.dev, fs.src, ranges) + if err != nil { + return nil, mapErr(err) + } + blend := desc.BlendDesc + factorFor := func(f driver.BlendFactor) vk.BlendFactor { + switch f { + case driver.BlendFactorZero: + return vk.BLEND_FACTOR_ZERO + case driver.BlendFactorOne: + return vk.BLEND_FACTOR_ONE + case driver.BlendFactorOneMinusSrcAlpha: + return vk.BLEND_FACTOR_ONE_MINUS_SRC_ALPHA + case driver.BlendFactorDstColor: + return vk.BLEND_FACTOR_DST_COLOR + default: + panic("unknown blend factor") + } + } + var top vk.PrimitiveTopology + switch desc.Topology { + case driver.TopologyTriangles: + top = vk.PRIMITIVE_TOPOLOGY_TRIANGLE_LIST + case driver.TopologyTriangleStrip: + top = vk.PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP + default: + panic("unknown topology") + } + var binds []vk.VertexInputBindingDescription + var attrs []vk.VertexInputAttributeDescription + inputs := desc.VertexLayout.Inputs + for i, inp := range inputs { + binds = append(binds, vk.VertexInputBindingDescription{ + Binding: i, + Stride: desc.VertexLayout.Stride, + }) + attrs = append(attrs, vk.VertexInputAttributeDescription{ + Binding: i, + Location: vs.src.Inputs[i].Location, + Format: vertFormatFor(vs.src.Inputs[i]), + Offset: inp.Offset, + }) + } + fmt := b.outFormat + if f := desc.PixelFormat; f != driver.TextureFormatOutput { + fmt = formatFor(f) + } + pass, err := vk.CreateRenderPass(b.dev, fmt, vk.ATTACHMENT_LOAD_OP_DONT_CARE, + vk.IMAGE_LAYOUT_UNDEFINED, vk.IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, nil) + if err != nil { + return nil, mapErr(err) + } + defer vk.DestroyRenderPass(b.dev, pass) + pipe, err := vk.CreateGraphicsPipeline(b.dev, pass, vs.module, fs.module, blend.Enable, factorFor(blend.SrcFactor), factorFor(blend.DstFactor), top, binds, attrs, descPool.layout) + if err != nil { + descPool.release(b.dev) + return nil, mapErr(err) + } + p := &Pipeline{backend: b, pipe: pipe, desc: descPool, pushRanges: ranges, ninputs: len(inputs)} + b.allPipes = append(b.allPipes, p) + return p, nil +} + +func (b *Backend) NewComputeProgram(src shader.Sources) (driver.Program, error) { + sh, err := b.newShader(src, vk.SHADER_STAGE_COMPUTE_BIT) + if err != nil { + return nil, mapErr(err) + } + defer sh.Release() + descPool, err := createPipelineLayout(b.dev, src, nil) + if err != nil { + return nil, mapErr(err) + } + pipe, err := vk.CreateComputePipeline(b.dev, sh.module, descPool.layout) + if err != nil { + descPool.release(b.dev) + return nil, mapErr(err) + } + return &Pipeline{backend: b, pipe: pipe, desc: descPool}, nil +} + +func vertFormatFor(f shader.InputLocation) vk.Format { + t := f.Type + s := f.Size + switch { + case t == shader.DataTypeFloat && s == 1: + return vk.FORMAT_R32_SFLOAT + case t == shader.DataTypeFloat && s == 2: + return vk.FORMAT_R32G32_SFLOAT + case t == shader.DataTypeFloat && s == 3: + return vk.FORMAT_R32G32B32_SFLOAT + case t == shader.DataTypeFloat && s == 4: + return vk.FORMAT_R32G32B32A32_SFLOAT + default: + panic("unsupported data type") + } +} + +func createPipelineLayout(d vk.Device, src shader.Sources, ranges []vk.PushConstantRange) (*descPool, error) { + var ( + descLayouts []vk.DescriptorSetLayout + descLayout vk.DescriptorSetLayout + ) + texBinds := make([]int, len(src.Textures)) + imgBinds := make([]int, len(src.Images)) + bufBinds := make([]int, len(src.StorageBuffers)) + var descBinds []vk.DescriptorSetLayoutBinding + for i, t := range src.Textures { + descBinds = append(descBinds, vk.DescriptorSetLayoutBinding{ + Binding: t.Binding, + StageFlags: vk.SHADER_STAGE_FRAGMENT_BIT, + DescriptorType: vk.DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + }) + texBinds[i] = t.Binding + } + for i, img := range src.Images { + descBinds = append(descBinds, vk.DescriptorSetLayoutBinding{ + Binding: img.Binding, + StageFlags: vk.SHADER_STAGE_COMPUTE_BIT, + DescriptorType: vk.DESCRIPTOR_TYPE_STORAGE_IMAGE, + }) + imgBinds[i] = img.Binding + } + for i, buf := range src.StorageBuffers { + descBinds = append(descBinds, vk.DescriptorSetLayoutBinding{ + Binding: buf.Binding, + StageFlags: vk.SHADER_STAGE_COMPUTE_BIT, + DescriptorType: vk.DESCRIPTOR_TYPE_STORAGE_BUFFER, + }) + bufBinds[i] = buf.Binding + } + if len(descBinds) > 0 { + var err error + descLayout, err = vk.CreateDescriptorSetLayout(d, descBinds) + if err != nil { + return nil, err + } + descLayouts = append(descLayouts, descLayout) + } + layout, err := vk.CreatePipelineLayout(d, ranges, descLayouts) + if err != nil { + if descLayout != 0 { + vk.DestroyDescriptorSetLayout(d, descLayout) + } + return nil, err + } + descPool := &descPool{ + texBinds: texBinds, + bufBinds: bufBinds, + imgBinds: imgBinds, + layout: layout, + descLayout: descLayout, + } + return descPool, nil +} + +func (b *Backend) newShader(src shader.Sources, stage vk.ShaderStageFlags) (*Shader, error) { + mod, err := vk.CreateShaderModule(b.dev, src.SPIRV) + if err != nil { + return nil, err + } + + sh := &Shader{dev: b.dev, module: mod, src: src} + if locs := src.Uniforms.Locations; len(locs) > 0 { + pushOffset := 0x7fffffff + for _, l := range locs { + if l.Offset < pushOffset { + pushOffset = l.Offset + } + } + sh.pushRange = vk.BuildPushConstantRange(stage, pushOffset, src.Uniforms.Size) + } + return sh, nil +} + +func (b *Backend) CopyTexture(dstTex driver.Texture, dorig image.Point, srcFBO driver.Texture, srect image.Rectangle) { + dst := dstTex.(*Texture) + src := srcFBO.(*Texture) + cmdBuf := b.ensureCmdBuf() + op := vk.BuildImageCopy(srect.Min.X, srect.Min.Y, dorig.X, dorig.Y, srect.Dx(), srect.Dy()) + src.imageBarrier(cmdBuf, + vk.IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + vk.PIPELINE_STAGE_TRANSFER_BIT, + vk.ACCESS_TRANSFER_READ_BIT, + ) + dst.imageBarrier(cmdBuf, + vk.IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + vk.PIPELINE_STAGE_TRANSFER_BIT, + vk.ACCESS_TRANSFER_WRITE_BIT, + ) + vk.CmdCopyImage(cmdBuf, src.img, src.layout, dst.img, dst.layout, []vk.ImageCopy{op}) +} + +func (b *Backend) Viewport(x, y, width, height int) { + cmdBuf := b.currentCmdBuf() + vp := vk.BuildViewport(float32(x), float32(y), float32(width), float32(height)) + vk.CmdSetViewport(cmdBuf, 0, vp) +} + +func (b *Backend) DrawArrays(off, count int) { + cmdBuf := b.currentCmdBuf() + if b.desc.dirty { + b.pipe.desc.bindDescriptorSet(b, cmdBuf, vk.PIPELINE_BIND_POINT_GRAPHICS, b.desc.texBinds, b.desc.bufBinds) + b.desc.dirty = false + } + vk.CmdDraw(cmdBuf, count, 1, off, 0) +} + +func (b *Backend) DrawElements(off, count int) { + cmdBuf := b.currentCmdBuf() + if b.desc.dirty { + b.pipe.desc.bindDescriptorSet(b, cmdBuf, vk.PIPELINE_BIND_POINT_GRAPHICS, b.desc.texBinds, b.desc.bufBinds) + b.desc.dirty = false + } + vk.CmdDrawIndexed(cmdBuf, count, 1, off, 0, 0) +} + +func (b *Backend) BindImageTexture(unit int, tex driver.Texture) { + t := tex.(*Texture) + b.desc.texBinds[unit] = t + b.desc.dirty = true + t.imageBarrier(b.currentCmdBuf(), + vk.IMAGE_LAYOUT_GENERAL, + vk.PIPELINE_STAGE_COMPUTE_SHADER_BIT, + vk.ACCESS_SHADER_READ_BIT|vk.ACCESS_SHADER_WRITE_BIT, + ) +} + +func (b *Backend) DispatchCompute(x, y, z int) { + cmdBuf := b.currentCmdBuf() + if b.desc.dirty { + b.pipe.desc.bindDescriptorSet(b, cmdBuf, vk.PIPELINE_BIND_POINT_COMPUTE, b.desc.texBinds, b.desc.bufBinds) + b.desc.dirty = false + } + vk.CmdDispatch(cmdBuf, x, y, z) +} + +func (t *Texture) Upload(offset, size image.Point, pixels []byte, stride int) { + if stride == 0 { + stride = size.X * 4 + } + cmdBuf := t.backend.ensureCmdBuf() + dstStride := size.X * 4 + n := size.Y * dstStride + stage, mem, off := t.backend.stagingBuffer(n) + var srcOff, dstOff int + for y := 0; y < size.Y; y++ { + srcRow := pixels[srcOff : srcOff+dstStride] + dstRow := mem[dstOff : dstOff+dstStride] + copy(dstRow, srcRow) + dstOff += dstStride + srcOff += stride + } + op := vk.BuildBufferImageCopy(off, dstStride/4, offset.X, offset.Y, size.X, size.Y) + t.imageBarrier(cmdBuf, + vk.IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + vk.PIPELINE_STAGE_TRANSFER_BIT, + vk.ACCESS_TRANSFER_WRITE_BIT, + ) + vk.CmdCopyBufferToImage(cmdBuf, stage.buf, t.img, t.layout, op) +} + +func (t *Texture) Release() { + if t.foreign { + panic("external textures cannot be released") + } + freet := *t + t.backend.deferFunc(func(d vk.Device) { + if freet.fbo != 0 { + vk.DestroyFramebuffer(d, freet.fbo) + } + vk.DestroySampler(d, freet.sampler) + vk.DestroyImageView(d, freet.view) + vk.DestroyImage(d, freet.img) + vk.FreeMemory(d, freet.mem) + }) + *t = Texture{} +} + +func (p *Pipeline) Release() { + freep := *p + p.backend.deferFunc(func(d vk.Device) { + freep.desc.release(d) + vk.DestroyPipeline(d, freep.pipe) + }) + *p = Pipeline{} +} + +func (p *descPool) release(d vk.Device) { + if p := p.pool; p != 0 { + vk.DestroyDescriptorPool(d, p) + } + if l := p.descLayout; l != 0 { + vk.DestroyDescriptorSetLayout(d, l) + } + vk.DestroyPipelineLayout(d, p.layout) +} + +func (p *descPool) bindDescriptorSet(b *Backend, cmdBuf vk.CommandBuffer, bindPoint vk.PipelineBindPoint, texBinds [texUnits]*Texture, bufBinds [storageUnits]*Buffer) { + realloced := false + destroyPool := func() { + if pool := p.pool; pool != 0 { + b.deferFunc(func(d vk.Device) { + vk.DestroyDescriptorPool(d, pool) + }) + } + p.pool = 0 + p.cap = 0 + } + for { + if p.size == p.cap { + if realloced { + panic("vulkan: vkAllocateDescriptorSet failed on a newly allocated descriptor pool") + } + destroyPool() + realloced = true + newCap := p.cap * 2 + const initialPoolSize = 100 + if newCap < initialPoolSize { + newCap = initialPoolSize + } + var poolSizes []vk.DescriptorPoolSize + if n := len(p.texBinds); n > 0 { + poolSizes = append(poolSizes, vk.BuildDescriptorPoolSize(vk.DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, newCap*n)) + } + if n := len(p.imgBinds); n > 0 { + poolSizes = append(poolSizes, vk.BuildDescriptorPoolSize(vk.DESCRIPTOR_TYPE_STORAGE_IMAGE, newCap*n)) + } + if n := len(p.bufBinds); n > 0 { + poolSizes = append(poolSizes, vk.BuildDescriptorPoolSize(vk.DESCRIPTOR_TYPE_STORAGE_BUFFER, newCap*n)) + } + pool, err := vk.CreateDescriptorPool(b.dev, newCap, poolSizes) + if err != nil { + panic(fmt.Errorf("vulkan: failed to allocate descriptor pool with %d descriptors", newCap)) + } + p.pool = pool + p.cap = newCap + p.size = 0 + } + l := p.descLayout + if l == 0 { + panic("vulkan: descriptor set is dirty, but pipeline has empty layout") + } + descSet, err := vk.AllocateDescriptorSet(b.dev, p.pool, l) + if err != nil { + destroyPool() + continue + } + p.size++ + for _, bind := range p.texBinds { + tex := texBinds[bind] + write := vk.BuildWriteDescriptorSetImage(descSet, bind, vk.DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, tex.sampler, tex.view, vk.IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + vk.UpdateDescriptorSet(b.dev, write) + } + for _, bind := range p.imgBinds { + tex := texBinds[bind] + write := vk.BuildWriteDescriptorSetImage(descSet, bind, vk.DESCRIPTOR_TYPE_STORAGE_IMAGE, 0, tex.view, vk.IMAGE_LAYOUT_GENERAL) + vk.UpdateDescriptorSet(b.dev, write) + } + for _, bind := range p.bufBinds { + buf := bufBinds[bind] + write := vk.BuildWriteDescriptorSetBuffer(descSet, bind, vk.DESCRIPTOR_TYPE_STORAGE_BUFFER, buf.buf) + vk.UpdateDescriptorSet(b.dev, write) + } + vk.CmdBindDescriptorSets(cmdBuf, bindPoint, p.layout, 0, []vk.DescriptorSet{descSet}) + break + } +} + +func (t *Texture) imageBarrier(cmdBuf vk.CommandBuffer, layout vk.ImageLayout, stage vk.PipelineStageFlags, access vk.AccessFlags) { + srcStage := t.scope.stage + if srcStage == 0 && t.layout == layout { + t.scope.stage = stage + t.scope.access = access + return + } + if srcStage == 0 { + srcStage = vk.PIPELINE_STAGE_TOP_OF_PIPE_BIT + } + b := vk.BuildImageMemoryBarrier( + t.img, + t.scope.access, access, + t.layout, layout, + ) + vk.CmdPipelineBarrier(cmdBuf, srcStage, stage, vk.DEPENDENCY_BY_REGION_BIT, nil, nil, []vk.ImageMemoryBarrier{b}) + t.layout = layout + t.scope.stage = stage + t.scope.access = access +} + +func (b *Backend) PrepareTexture(tex driver.Texture) { + t := tex.(*Texture) + cmdBuf := b.ensureCmdBuf() + t.imageBarrier(cmdBuf, + vk.IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + vk.PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + vk.ACCESS_SHADER_READ_BIT, + ) +} + +func (b *Backend) BindTexture(unit int, tex driver.Texture) { + t := tex.(*Texture) + b.desc.texBinds[unit] = t + b.desc.dirty = true +} + +func (b *Backend) BindPipeline(pipe driver.Pipeline) { + b.bindPipeline(pipe.(*Pipeline), vk.PIPELINE_BIND_POINT_GRAPHICS) +} + +func (b *Backend) BindProgram(prog driver.Program) { + b.bindPipeline(prog.(*Pipeline), vk.PIPELINE_BIND_POINT_COMPUTE) +} + +func (b *Backend) bindPipeline(p *Pipeline, point vk.PipelineBindPoint) { + b.pipe = p + b.desc.dirty = p.desc.descLayout != 0 + cmdBuf := b.currentCmdBuf() + vk.CmdBindPipeline(cmdBuf, point, p.pipe) +} + +func (s *Shader) Release() { + vk.DestroyShaderModule(s.dev, s.module) + *s = Shader{} +} + +func (b *Backend) BindStorageBuffer(binding int, buffer driver.Buffer) { + buf := buffer.(*Buffer) + b.desc.bufBinds[binding] = buf + b.desc.dirty = true + buf.barrier(b.currentCmdBuf(), + vk.PIPELINE_STAGE_COMPUTE_SHADER_BIT, + vk.ACCESS_SHADER_READ_BIT|vk.ACCESS_SHADER_WRITE_BIT, + ) +} + +func (b *Backend) BindUniforms(buffer driver.Buffer) { + buf := buffer.(*Buffer) + cmdBuf := b.currentCmdBuf() + for _, s := range b.pipe.pushRanges { + off := s.Offset() + vk.CmdPushConstants(cmdBuf, b.pipe.desc.layout, s.StageFlags(), off, buf.store[off:off+s.Size()]) + } +} + +func (b *Backend) BindVertexBuffer(buffer driver.Buffer, offset int) { + buf := buffer.(*Buffer) + cmdBuf := b.currentCmdBuf() + b.bindings = b.bindings[:0] + b.offsets = b.offsets[:0] + for i := 0; i < b.pipe.ninputs; i++ { + b.bindings = append(b.bindings, buf.buf) + b.offsets = append(b.offsets, vk.DeviceSize(offset)) + } + vk.CmdBindVertexBuffers(cmdBuf, 0, b.bindings, b.offsets) +} + +func (b *Backend) BindIndexBuffer(buffer driver.Buffer) { + buf := buffer.(*Buffer) + cmdBuf := b.currentCmdBuf() + vk.CmdBindIndexBuffer(cmdBuf, buf.buf, 0, vk.INDEX_TYPE_UINT16) +} + +func (b *Buffer) Download(data []byte) error { + if b.buf == 0 { + copy(data, b.store) + return nil + } + stage, mem, off := b.backend.stagingBuffer(len(data)) + cmdBuf := b.backend.ensureCmdBuf() + b.barrier(cmdBuf, + vk.PIPELINE_STAGE_TRANSFER_BIT, + vk.ACCESS_TRANSFER_READ_BIT, + ) + vk.CmdCopyBuffer(cmdBuf, b.buf, stage.buf, 0, off, len(data)) + stage.scope.stage = vk.PIPELINE_STAGE_TRANSFER_BIT + stage.scope.access = vk.ACCESS_TRANSFER_WRITE_BIT + stage.barrier(cmdBuf, + vk.PIPELINE_STAGE_HOST_BIT, + vk.ACCESS_HOST_READ_BIT, + ) + b.backend.submitCmdBuf(true) + copy(data, mem) + return nil +} + +func (b *Buffer) Upload(data []byte) { + if b.buf == 0 { + copy(b.store, data) + return + } + stage, mem, off := b.backend.stagingBuffer(len(data)) + copy(mem, data) + cmdBuf := b.backend.ensureCmdBuf() + b.barrier(cmdBuf, + vk.PIPELINE_STAGE_TRANSFER_BIT, + vk.ACCESS_TRANSFER_WRITE_BIT, + ) + vk.CmdCopyBuffer(cmdBuf, stage.buf, b.buf, off, 0, len(data)) + var access vk.AccessFlags + if b.usage&vk.BUFFER_USAGE_INDEX_BUFFER_BIT != 0 { + access |= vk.ACCESS_INDEX_READ_BIT + } + if b.usage&vk.BUFFER_USAGE_VERTEX_BUFFER_BIT != 0 { + access |= vk.ACCESS_VERTEX_ATTRIBUTE_READ_BIT + } + if access != 0 { + b.barrier(cmdBuf, + vk.PIPELINE_STAGE_VERTEX_INPUT_BIT, + access, + ) + } +} + +func (b *Buffer) barrier(cmdBuf vk.CommandBuffer, stage vk.PipelineStageFlags, access vk.AccessFlags) { + srcStage := b.scope.stage + if srcStage == 0 { + b.scope.stage = stage + b.scope.access = access + return + } + barrier := vk.BuildBufferMemoryBarrier( + b.buf, + b.scope.access, access, + ) + vk.CmdPipelineBarrier(cmdBuf, srcStage, stage, vk.DEPENDENCY_BY_REGION_BIT, nil, []vk.BufferMemoryBarrier{barrier}, nil) + b.scope.stage = stage + b.scope.access = access +} + +func (b *Buffer) Release() { + freeb := *b + if freeb.buf != 0 { + b.backend.deferFunc(func(d vk.Device) { + vk.DestroyBuffer(d, freeb.buf) + vk.FreeMemory(d, freeb.mem) + }) + } + *b = Buffer{} +} + +func (t *Texture) ReadPixels(src image.Rectangle, pixels []byte, stride int) error { + if len(pixels) == 0 { + return nil + } + sz := src.Size() + stageStride := sz.X * 4 + n := sz.Y * stageStride + stage, mem, off := t.backend.stagingBuffer(n) + cmdBuf := t.backend.ensureCmdBuf() + region := vk.BuildBufferImageCopy(off, stageStride/4, src.Min.X, src.Min.Y, sz.X, sz.Y) + t.imageBarrier(cmdBuf, + vk.IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + vk.PIPELINE_STAGE_TRANSFER_BIT, + vk.ACCESS_TRANSFER_READ_BIT, + ) + vk.CmdCopyImageToBuffer(cmdBuf, t.img, t.layout, stage.buf, []vk.BufferImageCopy{region}) + stage.scope.stage = vk.PIPELINE_STAGE_TRANSFER_BIT + stage.scope.access = vk.ACCESS_TRANSFER_WRITE_BIT + stage.barrier(cmdBuf, + vk.PIPELINE_STAGE_HOST_BIT, + vk.ACCESS_HOST_READ_BIT, + ) + t.backend.submitCmdBuf(true) + var srcOff, dstOff int + for y := 0; y < sz.Y; y++ { + dstRow := pixels[srcOff : srcOff+stageStride] + srcRow := mem[dstOff : dstOff+stageStride] + copy(dstRow, srcRow) + dstOff += stageStride + srcOff += stride + } + return nil +} + +func (b *Backend) currentCmdBuf() vk.CommandBuffer { + cur := b.cmdPool.current + if cur == nil { + panic("vulkan: invalid operation outside a render or compute pass") + } + return cur +} + +func (b *Backend) ensureCmdBuf() vk.CommandBuffer { + if b.cmdPool.current != nil { + return b.cmdPool.current + } + if b.cmdPool.used < len(b.cmdPool.buffers) { + buf := b.cmdPool.buffers[b.cmdPool.used] + b.cmdPool.current = buf + } else { + buf, err := vk.AllocateCommandBuffer(b.dev, b.cmdPool.pool) + if err != nil { + panic(err) + } + b.cmdPool.buffers = append(b.cmdPool.buffers, buf) + b.cmdPool.current = buf + } + b.cmdPool.used++ + buf := b.cmdPool.current + if err := vk.BeginCommandBuffer(buf); err != nil { + panic(err) + } + return buf +} + +func (b *Backend) BeginRenderPass(tex driver.Texture, d driver.LoadDesc) { + t := tex.(*Texture) + var vkop vk.AttachmentLoadOp + switch d.Action { + case driver.LoadActionClear: + vkop = vk.ATTACHMENT_LOAD_OP_CLEAR + case driver.LoadActionInvalidate: + vkop = vk.ATTACHMENT_LOAD_OP_DONT_CARE + case driver.LoadActionKeep: + vkop = vk.ATTACHMENT_LOAD_OP_LOAD + } + cmdBuf := b.ensureCmdBuf() + if sem := t.acquire; sem != 0 { + // The render pass targets a framebuffer that has an associated acquire semaphore. + // Wait for it by forming an execution barrier. + b.waitSems = append(b.waitSems, sem) + b.waitStages = append(b.waitStages, vk.PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT) + // But only for the first pass in a frame. + t.acquire = 0 + } + t.imageBarrier(cmdBuf, + vk.IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + vk.PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + vk.ACCESS_COLOR_ATTACHMENT_READ_BIT|vk.ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + ) + pass := b.lookupPass(t.format, vkop, t.layout, t.passLayout) + col := d.ClearColor + vk.CmdBeginRenderPass(cmdBuf, pass, t.fbo, t.width, t.height, [4]float32{col.R, col.G, col.B, col.A}) + t.layout = t.passLayout + // If the render pass describes an automatic image layout transition to its final layout, there + // is an implicit image barrier with destination PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT. Make + // sure any subsequent barrier includes the transition. + // See also https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#VkSubpassDependency. + t.scope.stage |= vk.PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT +} + +func (b *Backend) EndRenderPass() { + vk.CmdEndRenderPass(b.cmdPool.current) +} + +func (b *Backend) BeginCompute() { + b.ensureCmdBuf() +} + +func (b *Backend) EndCompute() { +} + +func (b *Backend) lookupPass(fmt vk.Format, loadAct vk.AttachmentLoadOp, initLayout, finalLayout vk.ImageLayout) vk.RenderPass { + key := passKey{fmt: fmt, loadAct: loadAct, initLayout: initLayout, finalLayout: finalLayout} + if pass, ok := b.passes[key]; ok { + return pass + } + pass, err := vk.CreateRenderPass(b.dev, fmt, loadAct, initLayout, finalLayout, nil) + if err != nil { + panic(err) + } + b.passes[key] = pass + return pass +} + +func (b *Backend) submitCmdBuf(sync bool) { + buf := b.cmdPool.current + if buf == nil { + return + } + b.cmdPool.current = nil + if err := vk.EndCommandBuffer(buf); err != nil { + panic(err) + } + var fence vk.Fence + if sync { + fence = b.fence + } + if err := vk.QueueSubmit(b.queue, buf, b.waitSems, b.waitStages, b.sigSems, fence); err != nil { + panic(err) + } + b.waitSems = b.waitSems[:0] + b.sigSems = b.sigSems[:0] + b.waitStages = b.waitStages[:0] + if sync { + vk.WaitForFences(b.dev, b.fence) + vk.ResetFences(b.dev, b.fence) + } +} + +func (b *Backend) stagingBuffer(size int) (*Buffer, []byte, int) { + if b.staging.size+size > b.staging.cap { + if b.staging.buf != nil { + vk.UnmapMemory(b.dev, b.staging.buf.mem) + b.staging.buf.Release() + b.staging.cap = 0 + } + cap := 2 * (b.staging.size + size) + buf, err := b.newBuffer(cap, vk.BUFFER_USAGE_TRANSFER_SRC_BIT|vk.BUFFER_USAGE_TRANSFER_DST_BIT, + vk.MEMORY_PROPERTY_HOST_VISIBLE_BIT|vk.MEMORY_PROPERTY_HOST_COHERENT_BIT) + if err != nil { + panic(err) + } + mem, err := vk.MapMemory(b.dev, buf.mem, 0, cap) + if err != nil { + buf.Release() + panic(err) + } + b.staging.buf = buf + b.staging.mem = mem + b.staging.size = 0 + b.staging.cap = cap + } + off := b.staging.size + b.staging.size += size + mem := b.staging.mem[off : off+size] + return b.staging.buf, mem, off +} + +func formatFor(format driver.TextureFormat) vk.Format { + switch format { + case driver.TextureFormatRGBA8: + return vk.FORMAT_R8G8B8A8_UNORM + case driver.TextureFormatSRGBA: + return vk.FORMAT_R8G8B8A8_SRGB + case driver.TextureFormatFloat: + return vk.FORMAT_R16_SFLOAT + default: + panic("unsupported texture format") + } +} + +func mapErr(err error) error { + var vkErr vk.Error + if errors.As(err, &vkErr) && vkErr == vk.ERROR_DEVICE_LOST { + return driver.ErrDeviceLost + } + return err +} + +func (f *Texture) ImplementsRenderTarget() {} diff --git a/vendor/gioui.org/gpu/internal/vulkan/vulkan_nosupport.go b/vendor/gioui.org/gpu/internal/vulkan/vulkan_nosupport.go new file mode 100644 index 0000000..4364a43 --- /dev/null +++ b/vendor/gioui.org/gpu/internal/vulkan/vulkan_nosupport.go @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package vulkan + +// Empty file to avoid the build error for platforms without Vulkan support. |