aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/unicode/bidi/bracket.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/unicode/bidi/bracket.go')
-rw-r--r--vendor/golang.org/x/text/unicode/bidi/bracket.go335
1 files changed, 335 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/unicode/bidi/bracket.go b/vendor/golang.org/x/text/unicode/bidi/bracket.go
new file mode 100644
index 0000000..1853939
--- /dev/null
+++ b/vendor/golang.org/x/text/unicode/bidi/bracket.go
@@ -0,0 +1,335 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bidi
+
+import (
+ "container/list"
+ "fmt"
+ "sort"
+)
+
+// This file contains a port of the reference implementation of the
+// Bidi Parentheses Algorithm:
+// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
+//
+// The implementation in this file covers definitions BD14-BD16 and rule N0
+// of UAX#9.
+//
+// Some preprocessing is done for each rune before data is passed to this
+// algorithm:
+// - opening and closing brackets are identified
+// - a bracket pair type, like '(' and ')' is assigned a unique identifier that
+// is identical for the opening and closing bracket. It is left to do these
+// mappings.
+// - The BPA algorithm requires that bracket characters that are canonical
+// equivalents of each other be able to be substituted for each other.
+// It is the responsibility of the caller to do this canonicalization.
+//
+// In implementing BD16, this implementation departs slightly from the "logical"
+// algorithm defined in UAX#9. In particular, the stack referenced there
+// supports operations that go beyond a "basic" stack. An equivalent
+// implementation based on a linked list is used here.
+
+// Bidi_Paired_Bracket_Type
+// BD14. An opening paired bracket is a character whose
+// Bidi_Paired_Bracket_Type property value is Open.
+//
+// BD15. A closing paired bracket is a character whose
+// Bidi_Paired_Bracket_Type property value is Close.
+type bracketType byte
+
+const (
+ bpNone bracketType = iota
+ bpOpen
+ bpClose
+)
+
+// bracketPair holds a pair of index values for opening and closing bracket
+// location of a bracket pair.
+type bracketPair struct {
+ opener int
+ closer int
+}
+
+func (b *bracketPair) String() string {
+ return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
+}
+
+// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
+type bracketPairs []bracketPair
+
+func (b bracketPairs) Len() int { return len(b) }
+func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
+func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
+
+// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
+//
+// For each rune, it takes the indexes into the original string, the class the
+// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
+// takes the direction type for the start-of-sentence and the embedding level.
+//
+// The identifiers for bracket types are the rune of the canonicalized opening
+// bracket for brackets (open or close) or 0 for runes that are not brackets.
+func resolvePairedBrackets(s *isolatingRunSequence) {
+ p := bracketPairer{
+ sos: s.sos,
+ openers: list.New(),
+ codesIsolatedRun: s.types,
+ indexes: s.indexes,
+ }
+ dirEmbed := L
+ if s.level&1 != 0 {
+ dirEmbed = R
+ }
+ p.locateBrackets(s.p.pairTypes, s.p.pairValues)
+ p.resolveBrackets(dirEmbed, s.p.initialTypes)
+}
+
+type bracketPairer struct {
+ sos Class // direction corresponding to start of sequence
+
+ // The following is a restatement of BD 16 using non-algorithmic language.
+ //
+ // A bracket pair is a pair of characters consisting of an opening
+ // paired bracket and a closing paired bracket such that the
+ // Bidi_Paired_Bracket property value of the former equals the latter,
+ // subject to the following constraints.
+ // - both characters of a pair occur in the same isolating run sequence
+ // - the closing character of a pair follows the opening character
+ // - any bracket character can belong at most to one pair, the earliest possible one
+ // - any bracket character not part of a pair is treated like an ordinary character
+ // - pairs may nest properly, but their spans may not overlap otherwise
+
+ // Bracket characters with canonical decompositions are supposed to be
+ // treated as if they had been normalized, to allow normalized and non-
+ // normalized text to give the same result. In this implementation that step
+ // is pushed out to the caller. The caller has to ensure that the pairValue
+ // slices contain the rune of the opening bracket after normalization for
+ // any opening or closing bracket.
+
+ openers *list.List // list of positions for opening brackets
+
+ // bracket pair positions sorted by location of opening bracket
+ pairPositions bracketPairs
+
+ codesIsolatedRun []Class // directional bidi codes for an isolated run
+ indexes []int // array of index values into the original string
+
+}
+
+// matchOpener reports whether characters at given positions form a matching
+// bracket pair.
+func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
+ return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
+}
+
+const maxPairingDepth = 63
+
+// locateBrackets locates matching bracket pairs according to BD16.
+//
+// This implementation uses a linked list instead of a stack, because, while
+// elements are added at the front (like a push) they are not generally removed
+// in atomic 'pop' operations, reducing the benefit of the stack archetype.
+func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
+ // traverse the run
+ // do that explicitly (not in a for-each) so we can record position
+ for i, index := range p.indexes {
+
+ // look at the bracket type for each character
+ if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
+ // continue scanning
+ continue
+ }
+ switch pairTypes[index] {
+ case bpOpen:
+ // check if maximum pairing depth reached
+ if p.openers.Len() == maxPairingDepth {
+ p.openers.Init()
+ return
+ }
+ // remember opener location, most recent first
+ p.openers.PushFront(i)
+
+ case bpClose:
+ // see if there is a match
+ count := 0
+ for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
+ count++
+ opener := elem.Value.(int)
+ if p.matchOpener(pairValues, opener, i) {
+ // if the opener matches, add nested pair to the ordered list
+ p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
+ // remove up to and including matched opener
+ for ; count > 0; count-- {
+ p.openers.Remove(p.openers.Front())
+ }
+ break
+ }
+ }
+ sort.Sort(p.pairPositions)
+ // if we get here, the closing bracket matched no openers
+ // and gets ignored
+ }
+ }
+}
+
+// Bracket pairs within an isolating run sequence are processed as units so
+// that both the opening and the closing paired bracket in a pair resolve to
+// the same direction.
+//
+// N0. Process bracket pairs in an isolating run sequence sequentially in
+// the logical order of the text positions of the opening paired brackets
+// using the logic given below. Within this scope, bidirectional types EN
+// and AN are treated as R.
+//
+// Identify the bracket pairs in the current isolating run sequence
+// according to BD16. For each bracket-pair element in the list of pairs of
+// text positions:
+//
+// a Inspect the bidirectional types of the characters enclosed within the
+// bracket pair.
+//
+// b If any strong type (either L or R) matching the embedding direction is
+// found, set the type for both brackets in the pair to match the embedding
+// direction.
+//
+// o [ e ] o -> o e e e o
+//
+// o [ o e ] -> o e o e e
+//
+// o [ NI e ] -> o e NI e e
+//
+// c Otherwise, if a strong type (opposite the embedding direction) is
+// found, test for adjacent strong types as follows: 1 First, check
+// backwards before the opening paired bracket until the first strong type
+// (L, R, or sos) is found. If that first preceding strong type is opposite
+// the embedding direction, then set the type for both brackets in the pair
+// to that type. 2 Otherwise, set the type for both brackets in the pair to
+// the embedding direction.
+//
+// o [ o ] e -> o o o o e
+//
+// o [ o NI ] o -> o o o NI o o
+//
+// e [ o ] o -> e e o e o
+//
+// e [ o ] e -> e e o e e
+//
+// e ( o [ o ] NI ) e -> e e o o o o NI e e
+//
+// d Otherwise, do not set the type for the current bracket pair. Note that
+// if the enclosed text contains no strong types the paired brackets will
+// both resolve to the same level when resolved individually using rules N1
+// and N2.
+//
+// e ( NI ) o -> e ( NI ) o
+
+// getStrongTypeN0 maps character's directional code to strong type as required
+// by rule N0.
+//
+// TODO: have separate type for "strong" directionality.
+func (p *bracketPairer) getStrongTypeN0(index int) Class {
+ switch p.codesIsolatedRun[index] {
+ // in the scope of N0, number types are treated as R
+ case EN, AN, AL, R:
+ return R
+ case L:
+ return L
+ default:
+ return ON
+ }
+}
+
+// classifyPairContent reports the strong types contained inside a Bracket Pair,
+// assuming the given embedding direction.
+//
+// It returns ON if no strong type is found. If a single strong type is found,
+// it returns this type. Otherwise it returns the embedding direction.
+//
+// TODO: use separate type for "strong" directionality.
+func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
+ dirOpposite := ON
+ for i := loc.opener + 1; i < loc.closer; i++ {
+ dir := p.getStrongTypeN0(i)
+ if dir == ON {
+ continue
+ }
+ if dir == dirEmbed {
+ return dir // type matching embedding direction found
+ }
+ dirOpposite = dir
+ }
+ // return ON if no strong type found, or class opposite to dirEmbed
+ return dirOpposite
+}
+
+// classBeforePair determines which strong types are present before a Bracket
+// Pair. Return R or L if strong type found, otherwise ON.
+func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
+ for i := loc.opener - 1; i >= 0; i-- {
+ if dir := p.getStrongTypeN0(i); dir != ON {
+ return dir
+ }
+ }
+ // no strong types found, return sos
+ return p.sos
+}
+
+// assignBracketType implements rule N0 for a single bracket pair.
+func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
+ // rule "N0, a", inspect contents of pair
+ dirPair := p.classifyPairContent(loc, dirEmbed)
+
+ // dirPair is now L, R, or N (no strong type found)
+
+ // the following logical tests are performed out of order compared to
+ // the statement of the rules but yield the same results
+ if dirPair == ON {
+ return // case "d" - nothing to do
+ }
+
+ if dirPair != dirEmbed {
+ // case "c": strong type found, opposite - check before (c.1)
+ dirPair = p.classBeforePair(loc)
+ if dirPair == dirEmbed || dirPair == ON {
+ // no strong opposite type found before - use embedding (c.2)
+ dirPair = dirEmbed
+ }
+ }
+ // else: case "b", strong type found matching embedding,
+ // no explicit action needed, as dirPair is already set to embedding
+ // direction
+
+ // set the bracket types to the type found
+ p.setBracketsToType(loc, dirPair, initialTypes)
+}
+
+func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
+ p.codesIsolatedRun[loc.opener] = dirPair
+ p.codesIsolatedRun[loc.closer] = dirPair
+
+ for i := loc.opener + 1; i < loc.closer; i++ {
+ index := p.indexes[i]
+ if initialTypes[index] != NSM {
+ break
+ }
+ p.codesIsolatedRun[i] = dirPair
+ }
+
+ for i := loc.closer + 1; i < len(p.indexes); i++ {
+ index := p.indexes[i]
+ if initialTypes[index] != NSM {
+ break
+ }
+ p.codesIsolatedRun[i] = dirPair
+ }
+}
+
+// resolveBrackets implements rule N0 for a list of pairs.
+func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
+ for _, loc := range p.pairPositions {
+ p.assignBracketType(loc, dirEmbed, initialTypes)
+ }
+}