1 files changed, 689 insertions, 442 deletions
diff --git a/vendor/gioui.org/gpu/gpu.go b/vendor/gioui.org/gpu/gpu.go
index 321b2a8..0bd15c6 100644
--- a/vendor/gioui.org/gpu/gpu.go
+++ b/vendor/gioui.org/gpu/gpu.go
@@ -13,36 +13,57 @@ import (
 	"image"
 	"image/color"
 	"math"
+	"os"
 	"reflect"
 	"time"
 	"unsafe"
 
 	"gioui.org/f32"
-	"gioui.org/gpu/backend"
+	"gioui.org/gpu/internal/driver"
+	"gioui.org/internal/byteslice"
 	"gioui.org/internal/f32color"
-	"gioui.org/internal/opconst"
 	"gioui.org/internal/ops"
-	gunsafe "gioui.org/internal/unsafe"
+	"gioui.org/internal/scene"
+	"gioui.org/internal/stroke"
 	"gioui.org/layout"
 	"gioui.org/op"
-	"gioui.org/op/paint"
+	"gioui.org/shader"
+	"gioui.org/shader/gio"
+
+	// Register backends.
+	_ "gioui.org/gpu/internal/d3d11"
+	_ "gioui.org/gpu/internal/metal"
+	_ "gioui.org/gpu/internal/opengl"
+	_ "gioui.org/gpu/internal/vulkan"
 )
 
-type GPU struct {
+type GPU interface {
+	// Release non-Go resources. The GPU is no longer valid after Release.
+	Release()
+	// Clear sets the clear color for the next Frame.
+	Clear(color color.NRGBA)
+	// Frame draws the graphics operations from op into a viewport of target.
+	Frame(frame *op.Ops, target RenderTarget, viewport image.Point) error
+	// Profile returns the last available profiling information. Profiling
+	// information is requested when Frame sees an io/profile.Op, and the result
+	// is available through Profile at some later time.
+	Profile() string
+}
+
+type gpu struct {
 	cache *resourceCache
 
-	defFBO                                            backend.Framebuffer
-	profile                                           string
-	timers                                            *timers
-	frameStart                                        time.Time
-	zopsTimer, stencilTimer, coverTimer, cleanupTimer *timer
-	drawOps                                           drawOps
-	ctx                                               backend.Device
-	renderer                                          *renderer
+	profile                                string
+	timers                                 *timers
+	frameStart                             time.Time
+	stencilTimer, coverTimer, cleanupTimer *timer
+	drawOps                                drawOps
+	ctx                                    driver.Device
+	renderer                               *renderer
 }
 
 type renderer struct {
-	ctx           backend.Device
+	ctx           driver.Device
 	blitter       *blitter
 	pather        *pather
 	packer        packer
@@ -50,17 +71,15 @@ type renderer struct {
 }
 
 type drawOps struct {
-	profile    bool
-	reader     ops.Reader
-	cache      *resourceCache
-	vertCache  []byte
-	viewport   image.Point
-	clearColor f32color.RGBA
-	imageOps   []imageOp
-	// zimageOps are the rectangle clipped opaque images
-	// that can use fast front-to-back rendering with z-test
-	// and no blending.
-	zimageOps   []imageOp
+	profile     bool
+	reader      ops.Reader
+	states      []f32.Affine2D
+	transStack  []f32.Affine2D
+	vertCache   []byte
+	viewport    image.Point
+	clear       bool
+	clearColor  f32color.RGBA
+	imageOps    []imageOp
 	pathOps     []*pathOp
 	pathOpCache []pathOp
 	qs          quadSplitter
@@ -68,26 +87,35 @@ type drawOps struct {
 }
 
 type drawState struct {
-	clip  f32.Rectangle
 	t     f32.Affine2D
 	cpath *pathOp
-	rect  bool
-	z     int
 
 	matType materialType
 	// Current paint.ImageOp
 	image imageOpData
 	// Current paint.ColorOp, if any.
-	color color.RGBA
+	color color.NRGBA
+
+	// Current paint.LinearGradientOp.
+	stop1  f32.Point
+	stop2  f32.Point
+	color1 color.NRGBA
+	color2 color.NRGBA
 }
 
 type pathOp struct {
 	off f32.Point
+	// rect tracks whether the clip stack can be represented by a
+	// pixel-aligned rectangle.
+	rect bool
 	// clip is the union of all
 	// later clip rectangles.
-	clip      image.Rectangle
-	bounds    f32.Rectangle
-	pathKey   ops.Key
+	clip   image.Rectangle
+	bounds f32.Rectangle
+	// intersect is the intersection of bounds and all
+	// previous clip bounds.
+	intersect f32.Rectangle
+	pathKey   opKey
 	path      bool
 	pathVerts []byte
 	parent    *pathOp
@@ -95,87 +123,70 @@ type pathOp struct {
 }
 
 type imageOp struct {
-	z        float32
 	path     *pathOp
-	off      f32.Point
 	clip     image.Rectangle
 	material material
 	clipType clipType
 	place    placement
 }
 
+func decodeStrokeOp(data []byte) float32 {
+	_ = data[4]
+	bo := binary.LittleEndian
+	return math.Float32frombits(bo.Uint32(data[1:]))
+}
+
+type quadsOp struct {
+	key opKey
+	aux []byte
+}
+
+type opKey struct {
+	outline        bool
+	strokeWidth    float32
+	sx, hx, sy, hy float32
+	ops.Key
+}
+
 type material struct {
 	material materialType
 	opaque   bool
 	// For materialTypeColor.
 	color f32color.RGBA
+	// For materialTypeLinearGradient.
+	color1 f32color.RGBA
+	color2 f32color.RGBA
 	// For materialTypeTexture.
-	texture *texture
+	data    imageOpData
 	uvTrans f32.Affine2D
 }
 
-// clipOp is the shadow of clip.Op.
-type clipOp struct {
-	bounds f32.Rectangle
-}
-
 // imageOpData is the shadow of paint.ImageOp.
 type imageOpData struct {
-	rect   image.Rectangle
 	src    *image.RGBA
 	handle interface{}
 }
 
-func (op *clipOp) decode(data []byte) {
-	if opconst.OpType(data[0]) != opconst.TypeClip {
-		panic("invalid op")
-	}
-	bo := binary.LittleEndian
-	r := f32.Rectangle{
-		Min: f32.Point{
-			X: math.Float32frombits(bo.Uint32(data[1:])),
-			Y: math.Float32frombits(bo.Uint32(data[5:])),
-		},
-		Max: f32.Point{
-			X: math.Float32frombits(bo.Uint32(data[9:])),
-			Y: math.Float32frombits(bo.Uint32(data[13:])),
-		},
-	}
-	*op = clipOp{
-		bounds: r,
-	}
+type linearGradientOpData struct {
+	stop1  f32.Point
+	color1 color.NRGBA
+	stop2  f32.Point
+	color2 color.NRGBA
 }
 
 func decodeImageOp(data []byte, refs []interface{}) imageOpData {
-	if opconst.OpType(data[0]) != opconst.TypeImage {
-		panic("invalid op")
-	}
 	handle := refs[1]
 	if handle == nil {
 		return imageOpData{}
 	}
-	bo := binary.LittleEndian
 	return imageOpData{
-		rect: image.Rectangle{
-			Min: image.Point{
-				X: int(bo.Uint32(data[1:])),
-				Y: int(bo.Uint32(data[5:])),
-			},
-			Max: image.Point{
-				X: int(bo.Uint32(data[9:])),
-				Y: int(bo.Uint32(data[13:])),
-			},
-		},
 		src:    refs[0].(*image.RGBA),
 		handle: handle,
 	}
 }
 
-func decodeColorOp(data []byte) color.RGBA {
-	if opconst.OpType(data[0]) != opconst.TypeColor {
-		panic("invalid op")
-	}
-	return color.RGBA{
+func decodeColorOp(data []byte) color.NRGBA {
+	return color.NRGBA{
 		R: data[1],
 		G: data[2],
 		B: data[3],
@@ -183,23 +194,29 @@ func decodeColorOp(data []byte) color.RGBA {
 	}
 }
 
-func decodePaintOp(data []byte) paint.PaintOp {
+func decodeLinearGradientOp(data []byte) linearGradientOpData {
 	bo := binary.LittleEndian
-	if opconst.OpType(data[0]) != opconst.TypePaint {
-		panic("invalid op")
-	}
-	r := f32.Rectangle{
-		Min: f32.Point{
+	return linearGradientOpData{
+		stop1: f32.Point{
 			X: math.Float32frombits(bo.Uint32(data[1:])),
 			Y: math.Float32frombits(bo.Uint32(data[5:])),
 		},
-		Max: f32.Point{
+		stop2: f32.Point{
 			X: math.Float32frombits(bo.Uint32(data[9:])),
 			Y: math.Float32frombits(bo.Uint32(data[13:])),
 		},
-	}
-	return paint.PaintOp{
-		Rect: r,
+		color1: color.NRGBA{
+			R: data[17+0],
+			G: data[17+1],
+			B: data[17+2],
+			A: data[17+3],
+		},
+		color2: color.NRGBA{
+			R: data[21+0],
+			G: data[21+1],
+			B: data[21+2],
+			A: data[21+3],
+		},
 	}
 }
 
@@ -211,58 +228,60 @@ type resource interface {
 
 type texture struct {
 	src *image.RGBA
-	tex backend.Texture
+	tex driver.Texture
 }
 
 type blitter struct {
-	ctx         backend.Device
-	viewport    image.Point
-	prog        [2]*program
-	layout      backend.InputLayout
-	colUniforms *blitColUniforms
-	texUniforms *blitTexUniforms
-	quadVerts   backend.Buffer
+	ctx                    driver.Device
+	viewport               image.Point
+	pipelines              [3]*pipeline
+	colUniforms            *blitColUniforms
+	texUniforms            *blitTexUniforms
+	linearGradientUniforms *blitLinearGradientUniforms
+	quadVerts              driver.Buffer
 }
 
 type blitColUniforms struct {
-	vert struct {
-		blitUniforms
-		_ [12]byte // Padding to a multiple of 16.
-	}
-	frag struct {
-		colorUniforms
-	}
+	blitUniforms
+	_ [128 - unsafe.Sizeof(blitUniforms{}) - unsafe.Sizeof(colorUniforms{})]byte // Padding to 128 bytes.
+	colorUniforms
 }
 
 type blitTexUniforms struct {
-	vert struct {
-		blitUniforms
-		_ [12]byte // Padding to a multiple of 16.
-	}
+	blitUniforms
+}
+
+type blitLinearGradientUniforms struct {
+	blitUniforms
+	_ [128 - unsafe.Sizeof(blitUniforms{}) - unsafe.Sizeof(gradientUniforms{})]byte // Padding to 128 bytes.
+	gradientUniforms
 }
 
 type uniformBuffer struct {
-	buf backend.Buffer
+	buf driver.Buffer
 	ptr []byte
 }
 
-type program struct {
-	prog         backend.Program
-	vertUniforms *uniformBuffer
-	fragUniforms *uniformBuffer
+type pipeline struct {
+	pipeline driver.Pipeline
+	uniforms *uniformBuffer
 }
 
 type blitUniforms struct {
 	transform     [4]float32
 	uvTransformR1 [4]float32
 	uvTransformR2 [4]float32
-	z             float32
 }
 
 type colorUniforms struct {
 	color f32color.RGBA
 }
 
+type gradientUniforms struct {
+	color1 f32color.RGBA
+	color2 f32color.RGBA
+}
+
 type materialType uint8
 
 const (
@@ -273,14 +292,37 @@ const (
 
 const (
 	materialColor materialType = iota
+	materialLinearGradient
 	materialTexture
 )
 
-func New(ctx backend.Device) (*GPU, error) {
-	defFBO := ctx.CurrentFramebuffer()
-	g := &GPU{
-		defFBO: defFBO,
-		cache:  newResourceCache(),
+// New creates a GPU for the given API.
+func New(api API) (GPU, error) {
+	d, err := driver.NewDevice(api)
+	if err != nil {
+		return nil, err
+	}
+	return NewWithDevice(d)
+}
+
+// NewWithDevice creates a GPU with a pre-existing device.
+//
+// Note: for internal use only.
+func NewWithDevice(d driver.Device) (GPU, error) {
+	d.BeginFrame(nil, false, image.Point{})
+	defer d.EndFrame()
+	forceCompute := os.Getenv("GIORENDERER") == "forcecompute"
+	feats := d.Caps().Features
+	switch {
+	case !forceCompute && feats.Has(driver.FeatureFloatRenderTargets) && feats.Has(driver.FeatureSRGB):
+		return newGPU(d)
+	}
+	return newCompute(d)
+}
+
+func newGPU(ctx driver.Device) (*gpu, error) {
+	g := &gpu{
+		cache: newResourceCache(),
 	}
 	g.drawOps.pathCache = newOpCache()
 	if err := g.init(ctx); err != nil {
@@ -289,111 +331,116 @@ func New(ctx backend.Device) (*GPU, error) {
 	return g, nil
 }
 
-func (g *GPU) init(ctx backend.Device) error {
+func (g *gpu) init(ctx driver.Device) error {
 	g.ctx = ctx
 	g.renderer = newRenderer(ctx)
 	return nil
 }
 
-func (g *GPU) Release() {
+func (g *gpu) Clear(col color.NRGBA) {
+	g.drawOps.clear = true
+	g.drawOps.clearColor = f32color.LinearFromSRGB(col)
+}
+
+func (g *gpu) Release() {
 	g.renderer.release()
 	g.drawOps.pathCache.release()
 	g.cache.release()
 	if g.timers != nil {
-		g.timers.release()
+		g.timers.Release()
 	}
+	g.ctx.Release()
+}
+
+func (g *gpu) Frame(frameOps *op.Ops, target RenderTarget, viewport image.Point) error {
+	g.collect(viewport, frameOps)
+	return g.frame(target)
 }
 
-func (g *GPU) Collect(viewport image.Point, frameOps *op.Ops) {
+func (g *gpu) collect(viewport image.Point, frameOps *op.Ops) {
 	g.renderer.blitter.viewport = viewport
 	g.renderer.pather.viewport = viewport
-	g.drawOps.reset(g.cache, viewport)
-	g.drawOps.collect(g.cache, frameOps, viewport)
+	g.drawOps.reset(viewport)
+	g.drawOps.collect(frameOps, viewport)
 	g.frameStart = time.Now()
-	if g.drawOps.profile && g.timers == nil && g.ctx.Caps().Features.Has(backend.FeatureTimers) {
+	if g.drawOps.profile && g.timers == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) {
 		g.timers = newTimers(g.ctx)
-		g.zopsTimer = g.timers.newTimer()
 		g.stencilTimer = g.timers.newTimer()
 		g.coverTimer = g.timers.newTimer()
 		g.cleanupTimer = g.timers.newTimer()
 	}
-	for _, p := range g.drawOps.pathOps {
-		if v, exists := g.drawOps.pathCache.get(p.pathKey); !exists || v.data.data == nil {
-			data := buildPath(g.ctx, p.pathVerts)
-			g.drawOps.pathCache.put(p.pathKey, opCacheValue{
-				data:   data,
-				bounds: p.bounds,
-			})
-		}
-		p.pathVerts = nil
-	}
 }
 
-func (g *GPU) BeginFrame() {
-	g.ctx.BeginFrame()
-	defer g.ctx.EndFrame()
+func (g *gpu) frame(target RenderTarget) error {
 	viewport := g.renderer.blitter.viewport
+	defFBO := g.ctx.BeginFrame(target, g.drawOps.clear, viewport)
+	defer g.ctx.EndFrame()
+	g.drawOps.buildPaths(g.ctx)
 	for _, img := range g.drawOps.imageOps {
 		expandPathOp(img.path, img.clip)
 	}
-	if g.drawOps.profile {
-		g.zopsTimer.begin()
-	}
-	g.ctx.BindFramebuffer(g.defFBO)
-	g.ctx.DepthFunc(backend.DepthFuncGreater)
-	g.ctx.ClearDepth(0.0)
-	g.ctx.Clear(g.drawOps.clearColor.Float32())
-	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
-	g.renderer.drawZOps(g.drawOps.zimageOps)
-	g.zopsTimer.end()
 	g.stencilTimer.begin()
-	g.ctx.SetBlend(true)
 	g.renderer.packStencils(&g.drawOps.pathOps)
 	g.renderer.stencilClips(g.drawOps.pathCache, g.drawOps.pathOps)
 	g.renderer.packIntersections(g.drawOps.imageOps)
+	g.renderer.prepareIntersections(g.drawOps.imageOps)
 	g.renderer.intersect(g.drawOps.imageOps)
 	g.stencilTimer.end()
 	g.coverTimer.begin()
-	g.ctx.BindFramebuffer(g.defFBO)
+	g.renderer.uploadImages(g.cache, g.drawOps.imageOps)
+	g.renderer.prepareDrawOps(g.cache, g.drawOps.imageOps)
+	d := driver.LoadDesc{
+		ClearColor: g.drawOps.clearColor,
+	}
+	if g.drawOps.clear {
+		g.drawOps.clear = false
+		d.Action = driver.LoadActionClear
+	}
+	g.ctx.BeginRenderPass(defFBO, d)
 	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
-	g.renderer.drawOps(g.drawOps.imageOps)
-	g.ctx.SetBlend(false)
-	g.renderer.pather.stenciler.invalidateFBO()
+	g.renderer.drawOps(g.cache, g.drawOps.imageOps)
 	g.coverTimer.end()
-	g.ctx.BindFramebuffer(g.defFBO)
-}
-
-func (g *GPU) EndFrame() {
+	g.ctx.EndRenderPass()
 	g.cleanupTimer.begin()
 	g.cache.frame()
 	g.drawOps.pathCache.frame()
 	g.cleanupTimer.end()
 	if g.drawOps.profile && g.timers.ready() {
-		zt, st, covt, cleant := g.zopsTimer.Elapsed, g.stencilTimer.Elapsed, g.coverTimer.Elapsed, g.cleanupTimer.Elapsed
-		ft := zt + st + covt + cleant
+		st, covt, cleant := g.stencilTimer.Elapsed, g.coverTimer.Elapsed, g.cleanupTimer.Elapsed
+		ft := st + covt + cleant
 		q := 100 * time.Microsecond
-		zt, st, covt = zt.Round(q), st.Round(q), covt.Round(q)
+		st, covt = st.Round(q), covt.Round(q)
 		frameDur := time.Since(g.frameStart).Round(q)
 		ft = ft.Round(q)
-		g.profile = fmt.Sprintf("draw:%7s gpu:%7s zt:%7s st:%7s cov:%7s", frameDur, ft, zt, st, covt)
+		g.profile = fmt.Sprintf("draw:%7s gpu:%7s st:%7s cov:%7s", frameDur, ft, st, covt)
 	}
+	return nil
 }
 
-func (g *GPU) Profile() string {
+func (g *gpu) Profile() string {
 	return g.profile
 }
 
-func (r *renderer) texHandle(t *texture) backend.Texture {
-	if t.tex != nil {
-		return t.tex
+func (r *renderer) texHandle(cache *resourceCache, data imageOpData) driver.Texture {
+	var tex *texture
+	t, exists := cache.get(data.handle)
+	if !exists {
+		t = &texture{
+			src: data.src,
+		}
+		cache.put(data.handle, t)
 	}
-	tex, err := r.ctx.NewTexture(backend.TextureFormatSRGB, t.src.Bounds().Dx(), t.src.Bounds().Dy(), backend.FilterLinear, backend.FilterLinear, backend.BufferBindingTexture)
+	tex = t.(*texture)
+	if tex.tex != nil {
+		return tex.tex
+	}
+	handle, err := r.ctx.NewTexture(driver.TextureFormatSRGBA, data.src.Bounds().Dx(), data.src.Bounds().Dy(), driver.FilterLinear, driver.FilterLinear, driver.BufferBindingTexture)
 	if err != nil {
 		panic(err)
 	}
-	tex.Upload(t.src)
-	t.tex = tex
-	return t.tex
+	driver.UploadImage(handle, image.Pt(0, 0), data.src)
+	tex.tex = handle
+	return tex.tex
 }
 
 func (t *texture) release() {
@@ -402,14 +449,22 @@ func (t *texture) release() {
 	}
 }
 
-func newRenderer(ctx backend.Device) *renderer {
+func newRenderer(ctx driver.Device) *renderer {
 	r := &renderer{
 		ctx:     ctx,
 		blitter: newBlitter(ctx),
 		pather:  newPather(ctx),
 	}
-	r.packer.maxDim = ctx.Caps().MaxTextureSize
-	r.intersections.maxDim = r.packer.maxDim
+
+	maxDim := ctx.Caps().MaxTextureSize
+	// Large atlas textures cause artifacts due to precision loss in
+	// shaders.
+	if cap := 8192; maxDim > cap {
+		maxDim = cap
+	}
+
+	r.packer.maxDims = image.Pt(maxDim, maxDim)
+	r.intersections.maxDims = image.Pt(maxDim, maxDim)
 	return r
 }
 
@@ -418,13 +473,13 @@ func (r *renderer) release() {
 	r.blitter.release()
 }
 
-func newBlitter(ctx backend.Device) *blitter {
-	quadVerts, err := ctx.NewImmutableBuffer(backend.BufferBindingVertices,
-		gunsafe.BytesView([]float32{
-			-1, +1, 0, 0,
-			+1, +1, 1, 0,
-			-1, -1, 0, 1,
-			+1, -1, 1, 1,
+func newBlitter(ctx driver.Device) *blitter {
+	quadVerts, err := ctx.NewImmutableBuffer(driver.BufferBindingVertices,
+		byteslice.Slice([]float32{
+			-1, -1, 0, 0,
+			+1, -1, 1, 0,
+			-1, +1, 0, 1,
+			+1, +1, 1, 1,
 		}),
 	)
 	if err != nil {
@@ -436,65 +491,125 @@ func newBlitter(ctx backend.Device) *blitter {
 	}
 	b.colUniforms = new(blitColUniforms)
 	b.texUniforms = new(blitTexUniforms)
-	prog, layout, err := createColorPrograms(ctx, shader_blit_vert, shader_blit_frag,
-		[2]interface{}{&b.colUniforms.vert, &b.texUniforms.vert}, [2]interface{}{&b.colUniforms.frag, nil})
+	b.linearGradientUniforms = new(blitLinearGradientUniforms)
+	pipelines, err := createColorPrograms(ctx, gio.Shader_blit_vert, gio.Shader_blit_frag,
+		[3]interface{}{b.colUniforms, b.linearGradientUniforms, b.texUniforms},
+	)
 	if err != nil {
 		panic(err)
 	}
-	b.prog = prog
-	b.layout = layout
+	b.pipelines = pipelines
 	return b
 }
 
 func (b *blitter) release() {
 	b.quadVerts.Release()
-	for _, p := range b.prog {
+	for _, p := range b.pipelines {
 		p.Release()
 	}
-	b.layout.Release()
 }
 
-func createColorPrograms(b backend.Device, vsSrc backend.ShaderSources, fsSrc [2]backend.ShaderSources, vertUniforms, fragUniforms [2]interface{}) ([2]*program, backend.InputLayout, error) {
-	var progs [2]*program
-	prog, err := b.NewProgram(vsSrc, fsSrc[materialTexture])
-	if err != nil {
-		return progs, nil, err
+func createColorPrograms(b driver.Device, vsSrc shader.Sources, fsSrc [3]shader.Sources, uniforms [3]interface{}) ([3]*pipeline, error) {
+	var pipelines [3]*pipeline
+	blend := driver.BlendDesc{
+		Enable:    true,
+		SrcFactor: driver.BlendFactorOne,
+		DstFactor: driver.BlendFactorOneMinusSrcAlpha,
 	}
-	var vertBuffer *uniformBuffer
-	if u := vertUniforms[materialTexture]; u != nil {
-		vertBuffer = newUniformBuffer(b, u)
-		prog.SetVertexUniforms(vertBuffer.buf)
-	}
-	var fragBuffer *uniformBuffer
-	if u := fragUniforms[materialTexture]; u != nil {
-		fragBuffer = newUniformBuffer(b, u)
-		prog.SetFragmentUniforms(fragBuffer.buf)
+	layout := driver.VertexLayout{
+		Inputs: []driver.InputDesc{
+			{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
+			{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
+		},
+		Stride: 4 * 4,
 	}
-	progs[materialTexture] = newProgram(prog, vertBuffer, fragBuffer)
-	prog, err = b.NewProgram(vsSrc, fsSrc[materialColor])
+	vsh, err := b.NewVertexShader(vsSrc)
 	if err != nil {
-		progs[materialTexture].Release()
-		return progs, nil, err
-	}
-	if u := vertUniforms[materialColor]; u != nil {
-		vertBuffer = newUniformBuffer(b, u)
-		prog.SetVertexUniforms(vertBuffer.buf)
-	}
-	if u := fragUniforms[materialColor]; u != nil {
-		fragBuffer = newUniformBuffer(b, u)
-		prog.SetFragmentUniforms(fragBuffer.buf)
-	}
-	progs[materialColor] = newProgram(prog, vertBuffer, fragBuffer)
-	layout, err := b.NewInputLayout(vsSrc, []backend.InputDesc{
-		{Type: backend.DataTypeFloat, Size: 2, Offset: 0},
-		{Type: backend.DataTypeFloat, Size: 2, Offset: 4 * 2},
-	})
+		return pipelines, err
+	}
+	defer vsh.Release()
+	{
+		fsh, err := b.NewFragmentShader(fsSrc[materialTexture])
+		if err != nil {
+			return pipelines, err
+		}
+		defer fsh.Release()
+		pipe, err := b.NewPipeline(driver.PipelineDesc{
+			VertexShader:   vsh,
+			FragmentShader: fsh,
+			BlendDesc:      blend,
+			VertexLayout:   layout,
+			PixelFormat:    driver.TextureFormatOutput,
+			Topology:       driver.TopologyTriangleStrip,
+		})
+		if err != nil {
+			return pipelines, err
+		}
+		var vertBuffer *uniformBuffer
+		if u := uniforms[materialTexture]; u != nil {
+			vertBuffer = newUniformBuffer(b, u)
+		}
+		pipelines[materialTexture] = &pipeline{pipe, vertBuffer}
+	}
+	{
+		var vertBuffer *uniformBuffer
+		fsh, err := b.NewFragmentShader(fsSrc[materialColor])
+		if err != nil {
+			pipelines[materialTexture].Release()
+			return pipelines, err
+		}
+		defer fsh.Release()
+		pipe, err := b.NewPipeline(driver.PipelineDesc{
+			VertexShader:   vsh,
+			FragmentShader: fsh,
+			BlendDesc:      blend,
+			VertexLayout:   layout,
+			PixelFormat:    driver.TextureFormatOutput,
+			Topology:       driver.TopologyTriangleStrip,
+		})
+		if err != nil {
+			pipelines[materialTexture].Release()
+			return pipelines, err
+		}
+		if u := uniforms[materialColor]; u != nil {
+			vertBuffer = newUniformBuffer(b, u)
+		}
+		pipelines[materialColor] = &pipeline{pipe, vertBuffer}
+	}
+	{
+		var vertBuffer *uniformBuffer
+		fsh, err := b.NewFragmentShader(fsSrc[materialLinearGradient])
+		if err != nil {
+			pipelines[materialTexture].Release()
+			pipelines[materialColor].Release()
+			return pipelines, err
+		}
+		defer fsh.Release()
+		pipe, err := b.NewPipeline(driver.PipelineDesc{
+			VertexShader:   vsh,
+			FragmentShader: fsh,
+			BlendDesc:      blend,
+			VertexLayout:   layout,
+			PixelFormat:    driver.TextureFormatOutput,
+			Topology:       driver.TopologyTriangleStrip,
+		})
+		if err != nil {
+			pipelines[materialTexture].Release()
+			pipelines[materialColor].Release()
+			return pipelines, err
+		}
+		if u := uniforms[materialLinearGradient]; u != nil {
+			vertBuffer = newUniformBuffer(b, u)
+		}
+		pipelines[materialLinearGradient] = &pipeline{pipe, vertBuffer}
+	}
 	if err != nil {
-		progs[materialTexture].Release()
-		progs[materialColor].Release()
-		return progs, nil, err
+		for _, p := range pipelines {
+			p.Release()
+		}
+		return pipelines, err
 	}
-	return progs, layout, nil
+	return pipelines, nil
 }
 
 func (r *renderer) stencilClips(pathCache *opCache, ops []*pathOp) {
@@ -505,14 +620,31 @@ func (r *renderer) stencilClips(pathCache *opCache, ops []*pathOp) {
 	r.pather.begin(r.packer.sizes)
 	for _, p := range ops {
 		if fbo != p.place.Idx {
+			if fbo != -1 {
+				r.ctx.EndRenderPass()
+			}
 			fbo = p.place.Idx
 			f := r.pather.stenciler.cover(fbo)
-			r.ctx.BindFramebuffer(f.fbo)
-			r.ctx.Clear(0.0, 0.0, 0.0, 0.0)
+			r.ctx.BeginRenderPass(f.tex, driver.LoadDesc{Action: driver.LoadActionClear})
+			r.ctx.BindPipeline(r.pather.stenciler.pipeline.pipeline.pipeline)
+			r.ctx.BindIndexBuffer(r.pather.stenciler.indexBuf)
 		}
 		v, _ := pathCache.get(p.pathKey)
 		r.pather.stencilPath(p.clip, p.off, p.place.Pos, v.data)
 	}
+	if fbo != -1 {
+		r.ctx.EndRenderPass()
+	}
+}
+
+func (r *renderer) prepareIntersections(ops []imageOp) {
+	for _, img := range ops {
+		if img.clipType != clipTypeIntersection {
+			continue
+		}
+		fbo := r.pather.stenciler.cover(img.path.place.Idx)
+		r.ctx.PrepareTexture(fbo.tex)
+	}
 }
 
 func (r *renderer) intersect(ops []imageOp) {
@@ -521,21 +653,28 @@ func (r *renderer) intersect(ops []imageOp) {
 	}
 	fbo := -1
 	r.pather.stenciler.beginIntersect(r.intersections.sizes)
-	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
-	r.ctx.BindInputLayout(r.pather.stenciler.iprog.layout)
 	for _, img := range ops {
 		if img.clipType != clipTypeIntersection {
 			continue
 		}
 		if fbo != img.place.Idx {
+			if fbo != -1 {
+				r.ctx.EndRenderPass()
+			}
 			fbo = img.place.Idx
 			f := r.pather.stenciler.intersections.fbos[fbo]
-			r.ctx.BindFramebuffer(f.fbo)
-			r.ctx.Clear(1.0, 0.0, 0.0, 0.0)
+			d := driver.LoadDesc{Action: driver.LoadActionClear}
+			d.ClearColor.R = 1.0
+			r.ctx.BeginRenderPass(f.tex, d)
+			r.ctx.BindPipeline(r.pather.stenciler.ipipeline.pipeline.pipeline)
+			r.ctx.BindVertexBuffer(r.blitter.quadVerts, 0)
 		}
 		r.ctx.Viewport(img.place.Pos.X, img.place.Pos.Y, img.clip.Dx(), img.clip.Dy())
 		r.intersectPath(img.path, img.clip)
 	}
+	if fbo != -1 {
+		r.ctx.EndRenderPass()
+	}
 }
 
 func (r *renderer) intersectPath(p *pathOp, clip image.Rectangle) {
@@ -556,12 +695,12 @@ func (r *renderer) intersectPath(p *pathOp, clip image.Rectangle) {
 	}
 	fbo := r.pather.stenciler.cover(p.place.Idx)
 	r.ctx.BindTexture(0, fbo.tex)
-	coverScale, coverOff := texSpaceTransform(toRectF(uv), fbo.size)
-	subScale, subOff := texSpaceTransform(toRectF(sub), p.clip.Size())
-	r.pather.stenciler.iprog.uniforms.vert.uvTransform = [4]float32{coverScale.X, coverScale.Y, coverOff.X, coverOff.Y}
-	r.pather.stenciler.iprog.uniforms.vert.subUVTransform = [4]float32{subScale.X, subScale.Y, subOff.X, subOff.Y}
-	r.pather.stenciler.iprog.prog.UploadUniforms()
-	r.ctx.DrawArrays(backend.DrawModeTriangleStrip, 0, 4)
+	coverScale, coverOff := texSpaceTransform(layout.FRect(uv), fbo.size)
+	subScale, subOff := texSpaceTransform(layout.FRect(sub), p.clip.Size())
+	r.pather.stenciler.ipipeline.uniforms.vert.uvTransform = [4]float32{coverScale.X, coverScale.Y, coverOff.X, coverOff.Y}
+	r.pather.stenciler.ipipeline.uniforms.vert.subUVTransform = [4]float32{subScale.X, subScale.Y, subOff.X, subOff.Y}
+	r.pather.stenciler.ipipeline.pipeline.UploadUniforms(r.ctx)
+	r.ctx.DrawArrays(0, 4)
 }
 
 func (r *renderer) packIntersections(ops []imageOp) {
@@ -611,7 +750,7 @@ func (r *renderer) packStencils(pops *[]*pathOp) {
 		if !ok {
 			// The clip area is at most the entire screen. Hopefully no
 			// screen is larger than GL_MAX_TEXTURE_SIZE.
-			panic(fmt.Errorf("clip area %v is larger than maximum texture size %dx%d", p.clip, r.packer.maxDim, r.packer.maxDim))
+			panic(fmt.Errorf("clip area %v is larger than maximum texture size %v", p.clip, r.packer.maxDims))
 		}
 		p.place = place
 		i++
@@ -619,8 +758,7 @@ func (r *renderer) packStencils(pops *[]*pathOp) {
 	*pops = ops
 }
 
-// intersects intersects clip and b where b is offset by off.
-// ceilRect returns a bounding image.Rectangle for a f32.Rectangle.
+// boundRectF returns a bounding image.Rectangle for a f32.Rectangle.
 func boundRectF(r f32.Rectangle) image.Rectangle {
 	return image.Rectangle{
 		Min: image.Point{
@@ -634,19 +772,6 @@ func boundRectF(r f32.Rectangle) image.Rectangle {
 	}
 }
 
-func toRectF(r image.Rectangle) f32.Rectangle {
-	return f32.Rectangle{
-		Min: f32.Point{
-			X: float32(r.Min.X),
-			Y: float32(r.Min.Y),
-		},
-		Max: f32.Point{
-			X: float32(r.Max.X),
-			Y: float32(r.Max.Y),
-		},
-	}
-}
-
 func ceil(v float32) int {
 	return int(math.Ceil(float64(v)))
 }
@@ -655,30 +780,39 @@ func floor(v float32) int {
 	return int(math.Floor(float64(v)))
 }
 
-func (d *drawOps) reset(cache *resourceCache, viewport image.Point) {
+func (d *drawOps) reset(viewport image.Point) {
 	d.profile = false
-	d.clearColor = f32color.RGBA{R: 1.0, G: 1.0, B: 1.0, A: 1.0}
-	d.cache = cache
 	d.viewport = viewport
 	d.imageOps = d.imageOps[:0]
-	d.zimageOps = d.zimageOps[:0]
 	d.pathOps = d.pathOps[:0]
 	d.pathOpCache = d.pathOpCache[:0]
 	d.vertCache = d.vertCache[:0]
+	d.transStack = d.transStack[:0]
 }
 
-func (d *drawOps) collect(cache *resourceCache, root *op.Ops, viewport image.Point) {
-	d.reset(cache, viewport)
-	clip := f32.Rectangle{
+func (d *drawOps) collect(root *op.Ops, viewport image.Point) {
+	viewf := f32.Rectangle{
 		Max: f32.Point{X: float32(viewport.X), Y: float32(viewport.Y)},
 	}
-	d.reader.Reset(root)
-	state := drawState{
-		clip:  clip,
-		rect:  true,
-		color: color.RGBA{A: 0xff},
+	var ops *ops.Ops
+	if root != nil {
+		ops = &root.Internal
+	}
+	d.reader.Reset(ops)
+	d.collectOps(&d.reader, viewf)
+}
+
+func (d *drawOps) buildPaths(ctx driver.Device) {
+	for _, p := range d.pathOps {
+		if v, exists := d.pathCache.get(p.pathKey); !exists || v.data.data == nil {
+			data := buildPath(ctx, p.pathVerts)
+			d.pathCache.put(p.pathKey, opCacheValue{
+				data:   data,
+				bounds: p.bounds,
+			})
+		}
+		p.pathVerts = nil
 	}
-	d.collectOps(&d.reader, state)
 }
 
 func (d *drawOps) newPathOp() *pathOp {
@@ -686,24 +820,30 @@ func (d *drawOps) newPathOp() *pathOp {
 	return &d.pathOpCache[len(d.pathOpCache)-1]
 }
 
-func (d *drawOps) addClipPath(state *drawState, aux []byte, auxKey ops.Key, bounds f32.Rectangle, off f32.Point) {
+func (d *drawOps) addClipPath(state *drawState, aux []byte, auxKey opKey, bounds f32.Rectangle, off f32.Point, push bool) {
 	npath := d.newPathOp()
 	*npath = pathOp{
-		parent: state.cpath,
-		bounds: bounds,
-		off:    off,
+		parent:    state.cpath,
+		bounds:    bounds,
+		off:       off,
+		intersect: bounds.Add(off),
+		rect:      true,
+	}
+	if npath.parent != nil {
+		npath.rect = npath.parent.rect
+		npath.intersect = npath.parent.intersect.Intersect(npath.intersect)
 	}
-	state.cpath = npath
 	if len(aux) > 0 {
-		state.rect = false
-		state.cpath.pathKey = auxKey
-		state.cpath.path = true
-		state.cpath.pathVerts = aux
-		d.pathOps = append(d.pathOps, state.cpath)
+		npath.rect = false
+		npath.pathKey = auxKey
+		npath.path = true
+		npath.pathVerts = aux
+		d.pathOps = append(d.pathOps, npath)
 	}
+	state.cpath = npath
 }
 
-// split a transform into two parts, one which is pur offset and the
+// split a transform into two parts, one which is pure offset and the
 // other representing the scaling, shearing and rotation part
 func splitTransform(t f32.Affine2D) (srs f32.Affine2D, offset f32.Point) {
 	sx, hx, ox, hy, sy, oy := t.Elems()
@@ -712,121 +852,169 @@ func splitTransform(t f32.Affine2D) (srs f32.Affine2D, offset f32.Point) {
 	return
 }
 
-func (d *drawOps) collectOps(r *ops.Reader, state drawState) int {
-	var aux []byte
-	var auxKey ops.Key
+func (d *drawOps) save(id int, state f32.Affine2D) {
+	if extra := id - len(d.states) + 1; extra > 0 {
+		d.states = append(d.states, make([]f32.Affine2D, extra)...)
+	}
+	d.states[id] = state
+}
+
+func (k opKey) SetTransform(t f32.Affine2D) opKey {
+	sx, hx, _, hy, sy, _ := t.Elems()
+	k.sx = sx
+	k.hx = hx
+	k.hy = hy
+	k.sy = sy
+	return k
+}
+
+func (d *drawOps) collectOps(r *ops.Reader, viewport f32.Rectangle) {
+	var (
+		quads quadsOp
+		state drawState
+	)
+	reset := func() {
+		state = drawState{
+			color: color.NRGBA{A: 0xff},
+		}
+	}
+	reset()
 loop:
 	for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() {
-		switch opconst.OpType(encOp.Data[0]) {
-		case opconst.TypeProfile:
+		switch ops.OpType(encOp.Data[0]) {
+		case ops.TypeProfile:
 			d.profile = true
-		case opconst.TypeTransform:
-			dop := ops.DecodeTransform(encOp.Data)
+		case ops.TypeTransform:
+			dop, push := ops.DecodeTransform(encOp.Data)
+			if push {
+				d.transStack = append(d.transStack, state.t)
+			}
 			state.t = state.t.Mul(dop)
-		case opconst.TypeAux:
-			aux = encOp.Data[opconst.TypeAuxLen:]
-			auxKey = encOp.Key
-		case opconst.TypeClip:
-			var op clipOp
-			op.decode(encOp.Data)
-			bounds := op.bounds
+		case ops.TypePopTransform:
+			n := len(d.transStack)
+			state.t = d.transStack[n-1]
+			d.transStack = d.transStack[:n-1]
+
+		case ops.TypeStroke:
+			quads.key.strokeWidth = decodeStrokeOp(encOp.Data)
+
+		case ops.TypePath:
+			encOp, ok = r.Decode()
+			if !ok {
+				break loop
+			}
+			quads.aux = encOp.Data[ops.TypeAuxLen:]
+			quads.key.Key = encOp.Key
+
+		case ops.TypeClip:
+			var op ops.ClipOp
+			op.Decode(encOp.Data)
+			quads.key.outline = op.Outline
+			bounds := layout.FRect(op.Bounds)
 			trans, off := splitTransform(state.t)
-			if len(aux) > 0 {
+			if len(quads.aux) > 0 {
 				// There is a clipping path, build the gpu data and update the
 				// cache key such that it will be equal only if the transform is the
 				// same also. Use cached data if we have it.
-				auxKey = auxKey.SetTransform(trans)
-				if v, ok := d.pathCache.get(auxKey); ok {
+				quads.key = quads.key.SetTransform(trans)
+				if v, ok := d.pathCache.get(quads.key); ok {
 					// Since the GPU data exists in the cache aux will not be used.
 					// Why is this not used for the offset shapes?
-					op.bounds = v.bounds
+					bounds = v.bounds
 				} else {
-					aux, op.bounds = d.buildVerts(aux, trans)
+					var pathData []byte
+					pathData, bounds = d.buildVerts(
+						quads.aux, trans, quads.key.outline, quads.key.strokeWidth,
+					)
+					quads.aux = pathData
 					// add it to the cache, without GPU data, so the transform can be
 					// reused.
-					d.pathCache.put(auxKey, opCacheValue{bounds: op.bounds})
+					d.pathCache.put(quads.key, opCacheValue{bounds: bounds})
 				}
 			} else {
-				aux, op.bounds, _ = d.boundsForTransformedRect(bounds, trans)
-				auxKey = encOp.Key
-				auxKey.SetTransform(trans)
+				quads.aux, bounds, _ = d.boundsForTransformedRect(bounds, trans)
+				quads.key = opKey{Key: encOp.Key}
 			}
-			state.clip = state.clip.Intersect(op.bounds.Add(off))
-			d.addClipPath(&state, aux, auxKey, op.bounds, off)
-			aux = nil
-			auxKey = ops.Key{}
-		case opconst.TypeColor:
+			d.addClipPath(&state, quads.aux, quads.key, bounds, off, true)
+			quads = quadsOp{}
+		case ops.TypePopClip:
+			state.cpath = state.cpath.parent
+
+		case ops.TypeColor:
 			state.matType = materialColor
 			state.color = decodeColorOp(encOp.Data)
-		case opconst.TypeImage:
+		case ops.TypeLinearGradient:
+			state.matType = materialLinearGradient
+			op := decodeLinearGradientOp(encOp.Data)
+			state.stop1 = op.stop1
+			state.stop2 = op.stop2
+			state.color1 = op.color1
+			state.color2 = op.color2
+		case ops.TypeImage:
 			state.matType = materialTexture
 			state.image = decodeImageOp(encOp.Data, encOp.Refs)
-		case opconst.TypePaint:
-			op := decodePaintOp(encOp.Data)
+		case ops.TypePaint:
 			// Transform (if needed) the painting rectangle and if so generate a clip path,
 			// for those cases also compute a partialTrans that maps texture coordinates between
 			// the new bounding rectangle and the transformed original paint rectangle.
-			trans, off := splitTransform(state.t)
-			clipData, bnd, partialTrans := d.boundsForTransformedRect(op.Rect, trans)
-			clip := state.clip.Intersect(bnd.Add(off))
-			if clip.Empty() {
+			t, off := splitTransform(state.t)
+			// Fill the clip area, unless the material is a (bounded) image.
+			// TODO: Find a tighter bound.
+			inf := float32(1e6)
+			dst := f32.Rect(-inf, -inf, inf, inf)
+			if state.matType == materialTexture {
+				sz := state.image.src.Rect.Size()
+				dst = f32.Rectangle{Max: layout.FPt(sz)}
+			}
+			clipData, bnd, partialTrans := d.boundsForTransformedRect(dst, t)
+			cl := viewport.Intersect(bnd.Add(off))
+			if state.cpath != nil {
+				cl = state.cpath.intersect.Intersect(cl)
+			}
+			if cl.Empty() {
 				continue
 			}
 
-			wasrect := state.rect
 			if clipData != nil {
 				// The paint operation is sheared or rotated, add a clip path representing
 				// this transformed rectangle.
-				encOp.Key.SetTransform(trans)
-				d.addClipPath(&state, clipData, encOp.Key, bnd, off)
+				k := opKey{Key: encOp.Key}
+				k.SetTransform(t) // TODO: This call has no effect.
+				d.addClipPath(&state, clipData, k, bnd, off, false)
 			}
 
-			bounds := boundRectF(clip)
-			mat := state.materialFor(d.cache, bnd, off, partialTrans, bounds)
+			bounds := boundRectF(cl)
+			mat := state.materialFor(bnd, off, partialTrans, bounds)
 
-			if bounds.Min == (image.Point{}) && bounds.Max == d.viewport && state.rect && mat.opaque && mat.material == materialColor {
+			rect := state.cpath == nil || state.cpath.rect
+			if bounds.Min == (image.Point{}) && bounds.Max == d.viewport && rect && mat.opaque && (mat.material == materialColor) {
 				// The image is a uniform opaque color and takes up the whole screen.
 				// Scrap images up to and including this image and set clear color.
-				d.zimageOps = d.zimageOps[:0]
 				d.imageOps = d.imageOps[:0]
-				state.z = 0
 				d.clearColor = mat.color.Opaque()
+				d.clear = true
 				continue
 			}
-			state.z++
-			if state.z != int(uint16(state.z)) {
-				// TODO(eliasnaur) gioui.org/issue/127.
-				panic("more than 65k paint objects not supported")
-			}
-			// Assume 16-bit depth buffer.
-			const zdepth = 1 << 16
-			// Convert z to window-space, assuming depth range [0;1].
-			zf := float32(state.z)*2/zdepth - 1.0
 			img := imageOp{
-				z:        zf,
 				path:     state.cpath,
-				off:      off,
 				clip:     bounds,
 				material: mat,
 			}
 
-			if state.rect && img.material.opaque {
-				d.zimageOps = append(d.zimageOps, img)
-			} else {
-				d.imageOps = append(d.imageOps, img)
-			}
+			d.imageOps = append(d.imageOps, img)
 			if clipData != nil {
 				// we added a clip path that should not remain
 				state.cpath = state.cpath.parent
-				state.rect = wasrect
 			}
-		case opconst.TypePush:
-			state.z = d.collectOps(r, state)
-		case opconst.TypePop:
-			break loop
+		case ops.TypeSave:
+			id := ops.DecodeSave(encOp.Data)
+			d.save(id, state.t)
+		case ops.TypeLoad:
+			reset()
+			id := ops.DecodeLoad(encOp.Data)
+			state.t = d.states[id]
 		}
 	}
-	return state.z
 }
 
 func expandPathOp(p *pathOp, clip image.Rectangle) {
@@ -840,78 +1028,83 @@ func expandPathOp(p *pathOp, clip image.Rectangle) {
 	}
 }
 
-func (d *drawState) materialFor(cache *resourceCache, rect f32.Rectangle, off f32.Point, trans f32.Affine2D, clip image.Rectangle) material {
+func (d *drawState) materialFor(rect f32.Rectangle, off f32.Point, partTrans f32.Affine2D, clip image.Rectangle) material {
 	var m material
 	switch d.matType {
 	case materialColor:
 		m.material = materialColor
-		m.color = f32color.RGBAFromSRGB(d.color)
+		m.color = f32color.LinearFromSRGB(d.color)
 		m.opaque = m.color.A == 1.0
+	case materialLinearGradient:
+		m.material = materialLinearGradient
+
+		m.color1 = f32color.LinearFromSRGB(d.color1)
+		m.color2 = f32color.LinearFromSRGB(d.color2)
+		m.opaque = m.color1.A == 1.0 && m.color2.A == 1.0
+
+		m.uvTrans = partTrans.Mul(gradientSpaceTransform(clip, off, d.stop1, d.stop2))
 	case materialTexture:
 		m.material = materialTexture
 		dr := boundRectF(rect.Add(off))
 		sz := d.image.src.Bounds().Size()
-		sr := layout.FRect(d.image.rect)
-		if dx := float32(dr.Dx()); dx != 0 {
-			// Don't clip 1 px width sources.
-			if sdx := sr.Dx(); sdx > 1 {
-				sr.Min.X += (float32(clip.Min.X-dr.Min.X)*sdx + dx/2) / dx
-				sr.Max.X -= (float32(dr.Max.X-clip.Max.X)*sdx + dx/2) / dx
-			}
-		}
-		if dy := float32(dr.Dy()); dy != 0 {
-			// Don't clip 1 px height sources.
-			if sdy := sr.Dy(); sdy > 1 {
-				sr.Min.Y += (float32(clip.Min.Y-dr.Min.Y)*sdy + dy/2) / dy
-				sr.Max.Y -= (float32(dr.Max.Y-clip.Max.Y)*sdy + dy/2) / dy
-			}
-		}
-		tex, exists := cache.get(d.image.handle)
-		if !exists {
-			t := &texture{
-				src: d.image.src,
-			}
-			cache.put(d.image.handle, t)
-			tex = t
+		sr := f32.Rectangle{
+			Max: f32.Point{
+				X: float32(sz.X),
+				Y: float32(sz.Y),
+			},
 		}
-		m.texture = tex.(*texture)
+		dx := float32(dr.Dx())
+		sdx := sr.Dx()
+		sr.Min.X += float32(clip.Min.X-dr.Min.X) * sdx / dx
+		sr.Max.X -= float32(dr.Max.X-clip.Max.X) * sdx / dx
+		dy := float32(dr.Dy())
+		sdy := sr.Dy()
+		sr.Min.Y += float32(clip.Min.Y-dr.Min.Y) * sdy / dy
+		sr.Max.Y -= float32(dr.Max.Y-clip.Max.Y) * sdy / dy
 		uvScale, uvOffset := texSpaceTransform(sr, sz)
-		m.uvTrans = trans.Mul(f32.Affine2D{}.Scale(f32.Point{}, uvScale).Offset(uvOffset))
+		m.uvTrans = partTrans.Mul(f32.Affine2D{}.Scale(f32.Point{}, uvScale).Offset(uvOffset))
+		m.data = d.image
 	}
 	return m
 }
 
-func (r *renderer) drawZOps(ops []imageOp) {
-	r.ctx.SetDepthTest(true)
-	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
-	r.ctx.BindInputLayout(r.blitter.layout)
-	// Render front to back.
-	for i := len(ops) - 1; i >= 0; i-- {
-		img := ops[i]
+func (r *renderer) uploadImages(cache *resourceCache, ops []imageOp) {
+	for _, img := range ops {
+		m := img.material
+		if m.material == materialTexture {
+			r.texHandle(cache, m.data)
+		}
+	}
+}
+
+func (r *renderer) prepareDrawOps(cache *resourceCache, ops []imageOp) {
+	for _, img := range ops {
 		m := img.material
 		switch m.material {
 		case materialTexture:
-			r.ctx.BindTexture(0, r.texHandle(m.texture))
+			r.ctx.PrepareTexture(r.texHandle(cache, m.data))
 		}
-		drc := img.clip
-		scale, off := clipSpaceTransform(drc, r.blitter.viewport)
-		r.blitter.blit(img.z, m.material, m.color, scale, off, m.uvTrans)
+
+		var fbo stencilFBO
+		switch img.clipType {
+		case clipTypeNone:
+			continue
+		case clipTypePath:
+			fbo = r.pather.stenciler.cover(img.place.Idx)
+		case clipTypeIntersection:
+			fbo = r.pather.stenciler.intersections.fbos[img.place.Idx]
+		}
+		r.ctx.PrepareTexture(fbo.tex)
 	}
-	r.ctx.SetDepthTest(false)
 }
 
-func (r *renderer) drawOps(ops []imageOp) {
-	r.ctx.SetDepthTest(true)
-	r.ctx.DepthMask(false)
-	r.ctx.BlendFunc(backend.BlendFactorOne, backend.BlendFactorOneMinusSrcAlpha)
-	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
-	r.ctx.BindInputLayout(r.pather.coverer.layout)
-	var coverTex backend.Texture
+func (r *renderer) drawOps(cache *resourceCache, ops []imageOp) {
+	var coverTex driver.Texture
 	for _, img := range ops {
 		m := img.material
 		switch m.material {
 		case materialTexture:
-			r.ctx.BindTexture(0, r.texHandle(m.texture))
+			r.ctx.BindTexture(0, r.texHandle(cache, m.data))
 		}
 		drc := img.clip
 
@@ -919,7 +1112,10 @@ func (r *renderer) drawOps(ops []imageOp) {
 		var fbo stencilFBO
 		switch img.clipType {
 		case clipTypeNone:
-			r.blitter.blit(img.z, m.material, m.color, scale, off, m.uvTrans)
+			p := r.blitter.pipelines[m.material]
+			r.ctx.BindPipeline(p.pipeline)
+			r.ctx.BindVertexBuffer(r.blitter.quadVerts, 0)
+			r.blitter.blit(m.material, m.color, m.color1, m.color2, scale, off, m.uvTrans)
 			continue
 		case clipTypePath:
 			fbo = r.pather.stenciler.cover(img.place.Idx)
@@ -934,42 +1130,50 @@ func (r *renderer) drawOps(ops []imageOp) {
 			Min: img.place.Pos,
 			Max: img.place.Pos.Add(drc.Size()),
 		}
-		coverScale, coverOff := texSpaceTransform(toRectF(uv), fbo.size)
-		r.pather.cover(img.z, m.material, m.color, scale, off, m.uvTrans, coverScale, coverOff)
+		coverScale, coverOff := texSpaceTransform(layout.FRect(uv), fbo.size)
+		p := r.pather.coverer.pipelines[m.material]
+		r.ctx.BindPipeline(p.pipeline)
+		r.ctx.BindVertexBuffer(r.blitter.quadVerts, 0)
+		r.pather.cover(m.material, m.color, m.color1, m.color2, scale, off, m.uvTrans, coverScale, coverOff)
 	}
-	r.ctx.DepthMask(true)
-	r.ctx.SetDepthTest(false)
 }
 
-func (b *blitter) blit(z float32, mat materialType, col f32color.RGBA, scale, off f32.Point, uvTrans f32.Affine2D) {
-	p := b.prog[mat]
-	b.ctx.BindProgram(p.prog)
+func (b *blitter) blit(mat materialType, col f32color.RGBA, col1, col2 f32color.RGBA, scale, off f32.Point, uvTrans f32.Affine2D) {
+	p := b.pipelines[mat]
+	b.ctx.BindPipeline(p.pipeline)
 	var uniforms *blitUniforms
 	switch mat {
 	case materialColor:
-		b.colUniforms.frag.color = col
-		uniforms = &b.colUniforms.vert.blitUniforms
+		b.colUniforms.color = col
+		uniforms = &b.colUniforms.blitUniforms
 	case materialTexture:
 		t1, t2, t3, t4, t5, t6 := uvTrans.Elems()
-		b.texUniforms.vert.blitUniforms.uvTransformR1 = [4]float32{t1, t2, t3, 0}
-		b.texUniforms.vert.blitUniforms.uvTransformR2 = [4]float32{t4, t5, t6, 0}
-		uniforms = &b.texUniforms.vert.blitUniforms
+		b.texUniforms.blitUniforms.uvTransformR1 = [4]float32{t1, t2, t3, 0}
+		b.texUniforms.blitUniforms.uvTransformR2 = [4]float32{t4, t5, t6, 0}
+		uniforms = &b.texUniforms.blitUniforms
+	case materialLinearGradient:
+		b.linearGradientUniforms.color1 = col1
+		b.linearGradientUniforms.color2 = col2
+
+		t1, t2, t3, t4, t5, t6 := uvTrans.Elems()
+		b.linearGradientUniforms.blitUniforms.uvTransformR1 = [4]float32{t1, t2, t3, 0}
+		b.linearGradientUniforms.blitUniforms.uvTransformR2 = [4]float32{t4, t5, t6, 0}
+		uniforms = &b.linearGradientUniforms.blitUniforms
 	}
-	uniforms.z = z
 	uniforms.transform = [4]float32{scale.X, scale.Y, off.X, off.Y}
-	p.UploadUniforms()
-	b.ctx.DrawArrays(backend.DrawModeTriangleStrip, 0, 4)
+	p.UploadUniforms(b.ctx)
+	b.ctx.DrawArrays(0, 4)
 }
 
 // newUniformBuffer creates a new GPU uniform buffer backed by the
 // structure uniformBlock points to.
-func newUniformBuffer(b backend.Device, uniformBlock interface{}) *uniformBuffer {
+func newUniformBuffer(b driver.Device, uniformBlock interface{}) *uniformBuffer {
 	ref := reflect.ValueOf(uniformBlock)
 	// Determine the size of the uniforms structure, *uniforms.
 	size := ref.Elem().Type().Size()
 	// Map the uniforms structure as a byte slice.
 	ptr := (*[1 << 30]byte)(unsafe.Pointer(ref.Pointer()))[:size:size]
-	ubuf, err := b.NewBuffer(backend.BufferBindingUniforms, len(ptr))
+	ubuf, err := b.NewBuffer(driver.BufferBindingUniforms, len(ptr))
 	if err != nil {
 		panic(err)
 	}
@@ -985,36 +1189,19 @@ func (u *uniformBuffer) Release() {
 	u.buf = nil
 }
 
-func newProgram(prog backend.Program, vertUniforms, fragUniforms *uniformBuffer) *program {
-	if vertUniforms != nil {
-		prog.SetVertexUniforms(vertUniforms.buf)
-	}
-	if fragUniforms != nil {
-		prog.SetFragmentUniforms(fragUniforms.buf)
-	}
-	return &program{prog: prog, vertUniforms: vertUniforms, fragUniforms: fragUniforms}
-}
-
-func (p *program) UploadUniforms() {
-	if p.vertUniforms != nil {
-		p.vertUniforms.Upload()
-	}
-	if p.fragUniforms != nil {
-		p.fragUniforms.Upload()
+func (p *pipeline) UploadUniforms(ctx driver.Device) {
+	if p.uniforms != nil {
+		p.uniforms.Upload()
+		ctx.BindUniforms(p.uniforms.buf)
 	}
 }
 
-func (p *program) Release() {
-	p.prog.Release()
-	p.prog = nil
-	if p.vertUniforms != nil {
-		p.vertUniforms.Release()
-		p.vertUniforms = nil
-	}
-	if p.fragUniforms != nil {
-		p.fragUniforms.Release()
-		p.fragUniforms = nil
+func (p *pipeline) Release() {
+	p.pipeline.Release()
+	if p.uniforms != nil {
+		p.uniforms.Release()
 	}
+	*p = pipeline{}
 }
 
 // texSpaceTransform return the scale and offset that transforms the given subimage
@@ -1026,26 +1213,42 @@ func texSpaceTransform(r f32.Rectangle, bounds image.Point) (f32.Point, f32.Poin
 	return scale, offset
 }
 
+// gradientSpaceTransform transforms stop1 and stop2 to [(0,0), (1,1)].
+func gradientSpaceTransform(clip image.Rectangle, off f32.Point, stop1, stop2 f32.Point) f32.Affine2D {
+	d := stop2.Sub(stop1)
+	l := float32(math.Sqrt(float64(d.X*d.X + d.Y*d.Y)))
+	a := float32(math.Atan2(float64(-d.Y), float64(d.X)))
+
+	// TODO: optimize
+	zp := f32.Point{}
+	return f32.Affine2D{}.
+		Scale(zp, layout.FPt(clip.Size())).            // scale to pixel space
+		Offset(zp.Sub(off).Add(layout.FPt(clip.Min))). // offset to clip space
+		Offset(zp.Sub(stop1)).                         // offset to first stop point
+		Rotate(zp, a).                                 // rotate to align gradient
+		Scale(zp, f32.Pt(1/l, 1/l))                    // scale gradient to right size
+}
+
 // clipSpaceTransform returns the scale and offset that transforms the given
-// rectangle from a viewport into OpenGL clip space.
+// rectangle from a viewport into GPU driver device coordinates.
 func clipSpaceTransform(r image.Rectangle, viewport image.Point) (f32.Point, f32.Point) {
-	// First, transform UI coordinates to OpenGL coordinates:
+	// First, transform UI coordinates to device coordinates:
 	//
-	//	[(-1, +1) (+1, +1)]
 	//	[(-1, -1) (+1, -1)]
+	//	[(-1, +1) (+1, +1)]
 	//
 	x, y := float32(r.Min.X), float32(r.Min.Y)
 	w, h := float32(r.Dx()), float32(r.Dy())
 	vx, vy := 2/float32(viewport.X), 2/float32(viewport.Y)
 	x = x*vx - 1
-	y = 1 - y*vy
+	y = y*vy - 1
 	w *= vx
 	h *= vy
 
 	// Then, compute the transformation from the fullscreen quad to
 	// the rectangle at (x, y) and dimensions (w, h).
 	scale := f32.Point{X: w * .5, Y: h * .5}
-	offset := f32.Point{X: x + w*.5, Y: y - h*.5}
+	offset := f32.Point{X: x + w*.5, Y: y + h*.5}
 
 	return scale, offset
 }
@@ -1097,29 +1300,73 @@ func (d *drawOps) writeVertCache(n int) []byte {
 }
 
 // transform, split paths as needed, calculate maxY, bounds and create GPU vertices.
-func (d *drawOps) buildVerts(aux []byte, tr f32.Affine2D) (verts []byte, bounds f32.Rectangle) {
+func (d *drawOps) buildVerts(pathData []byte, tr f32.Affine2D, outline bool, strWidth float32) (verts []byte, bounds f32.Rectangle) {
 	inf := float32(math.Inf(+1))
 	d.qs.bounds = f32.Rectangle{
 		Min: f32.Point{X: inf, Y: inf},
 		Max: f32.Point{X: -inf, Y: -inf},
 	}
 	d.qs.d = d
-	bo := binary.LittleEndian
 	startLength := len(d.vertCache)
-	for qi := 0; len(aux) >= (ops.QuadSize + 4); qi++ {
-		d.qs.contour = bo.Uint32(aux)
-		quad := ops.DecodeQuad(aux[4:])
-		quad = quad.Transform(tr)
 
-		d.qs.splitAndEncode(quad)
+	switch {
+	case strWidth > 0:
+		// Stroke path.
+		ss := stroke.StrokeStyle{
+			Width: strWidth,
+		}
+		quads := stroke.StrokePathCommands(ss, pathData)
+		for _, quad := range quads {
+			d.qs.contour = quad.Contour
+			quad.Quad = quad.Quad.Transform(tr)
+
+			d.qs.splitAndEncode(quad.Quad)
+		}
 
-		aux = aux[ops.QuadSize+4:]
+	case outline:
+		decodeToOutlineQuads(&d.qs, tr, pathData)
 	}
 
 	fillMaxY(d.vertCache[startLength:])
 	return d.vertCache[startLength:], d.qs.bounds
 }
 
+// decodeOutlineQuads decodes scene commands, splits them into quadratic béziers
+// as needed and feeds them to the supplied splitter.
+func decodeToOutlineQuads(qs *quadSplitter, tr f32.Affine2D, pathData []byte) {
+	for len(pathData) >= scene.CommandSize+4 {
+		qs.contour = bo.Uint32(pathData)
+		cmd := ops.DecodeCommand(pathData[4:])
+		switch cmd.Op() {
+		case scene.OpLine:
+			var q stroke.QuadSegment
+			q.From, q.To = scene.DecodeLine(cmd)
+			q.Ctrl = q.From.Add(q.To).Mul(.5)
+			q = q.Transform(tr)
+			qs.splitAndEncode(q)
+		case scene.OpGap:
+			var q stroke.QuadSegment
+			q.From, q.To = scene.DecodeGap(cmd)
+			q.Ctrl = q.From.Add(q.To).Mul(.5)
+			q = q.Transform(tr)
+			qs.splitAndEncode(q)
+		case scene.OpQuad:
+			var q stroke.QuadSegment
+			q.From, q.Ctrl, q.To = scene.DecodeQuad(cmd)
+			q = q.Transform(tr)
+			qs.splitAndEncode(q)
+		case scene.OpCubic:
+			for _, q := range stroke.SplitCubic(scene.DecodeCubic(cmd)) {
+				q = q.Transform(tr)
+				qs.splitAndEncode(q)
+			}
+		default:
+			panic("unsupported scene command")
+		}
+		pathData = pathData[scene.CommandSize+4:]
+	}
+}
+
 // create GPU vertices for transformed r, find the bounds and establish texture transform.
 func (d *drawOps) boundsForTransformedRect(r f32.Rectangle, tr f32.Affine2D) (aux []byte, bnd f32.Rectangle, ptr f32.Affine2D) {
 	if isPureOffset(tr) {