aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Jarry <robin@jarry.cc>2023-10-03 12:02:17 +0200
committerRobin Jarry <robin@jarry.cc>2023-10-03 22:34:39 +0200
commitbbe5e81538a537a3c65d9b94e0dbb1d4cd1604df (patch)
tree7471b9dc199672dddb1c934e8b54f77604d593fe
parenta99e0f007ead1d427465efe987294c19ed2f26f2 (diff)
downloadaerc-bbe5e81538a537a3c65d9b94e0dbb1d4cd1604df.tar.gz
aerc-bbe5e81538a537a3c65d9b94e0dbb1d4cd1604df.zip
lib: add shell command lexer
Signed-off-by: Robin Jarry <robin@jarry.cc>
-rw-r--r--lib/opt/args.go201
-rw-r--r--lib/opt/args_test.go106
-rw-r--r--lib/opt/shlex.go174
3 files changed, 481 insertions, 0 deletions
diff --git a/lib/opt/args.go b/lib/opt/args.go
new file mode 100644
index 00000000..f1a4ed2c
--- /dev/null
+++ b/lib/opt/args.go
@@ -0,0 +1,201 @@
+package opt
+
+import (
+ "errors"
+ "strings"
+)
+
+// Shell command line with interpreted arguments.
+// Allows access to individual arguments and to preserve shell quoting.
+type Args struct {
+ raw []rune
+ infos []argInfo
+}
+
+// Interpret a shell command line into multiple arguments.
+func SplitArgs(cmd string) (*Args, error) {
+ raw := []rune(cmd)
+ infos, err := parseArgs(raw)
+ if err != nil {
+ return nil, err
+ }
+ return &Args{raw: raw, infos: infos}, nil
+}
+
+// Build a shell command from multiple arguments.
+func QuoteArgs(args ...string) (*Args, error) {
+ quoted := make([]string, len(args))
+ for i, arg := range args {
+ quoted[i] = QuoteArg(arg)
+ }
+ return SplitArgs(strings.Join(quoted, " "))
+}
+
+// Wrap a single argument with appropriate quoting so that it can be used
+// in a shell command.
+func QuoteArg(arg string) string {
+ if strings.ContainsAny(arg, " '\"|?&!#$;[](){}<>*\n\t") {
+ arg = "'" + strings.ReplaceAll(arg, "'", `'"'"'`) + "'"
+ }
+ return arg
+}
+
+// Get the number of arguments after interpreting shell quotes.
+func (a *Args) Count() int {
+ return len(a.infos)
+}
+
+var ErrArgIndex = errors.New("argument index out of bounds")
+
+// Remove n arguments from the begining of the command line.
+// Same semantics as the `shift` built-in shell command.
+// Will fail if shifting an invalid number of arguments.
+func (a *Args) ShiftSafe(n int) ([]string, error) {
+ var shifted []string
+ switch {
+ case n == 0:
+ shifted = []string{}
+ case n >= 0 && n < len(a.infos):
+ for i := 0; i < n; i++ {
+ shifted = append(shifted, a.infos[i].unquoted)
+ }
+ a.infos = a.infos[n:]
+ start := a.infos[0].start
+ a.raw = a.raw[start:]
+ for i := range a.infos {
+ a.infos[i].start -= start
+ }
+ case n == len(a.infos):
+ for i := 0; i < n; i++ {
+ shifted = append(shifted, a.infos[i].unquoted)
+ }
+ a.raw = nil
+ a.infos = nil
+ default:
+ return nil, ErrArgIndex
+ }
+ return shifted, nil
+}
+
+// Same as ShiftSafe but cannot fail.
+func (a *Args) Shift(n int) []string {
+ if n < 0 {
+ n = 0
+ } else if n > len(a.infos) {
+ n = len(a.infos)
+ }
+ shifted, _ := a.ShiftSafe(n)
+ return shifted
+}
+
+// Remove n arguments from the end of the command line.
+// Will fail if cutting an invalid number of arguments.
+func (a *Args) CutSafe(n int) ([]string, error) {
+ var cut []string
+ switch {
+ case n == 0:
+ cut = []string{}
+ case n >= 0 && n < len(a.infos):
+ for i := len(a.infos) - n; i < len(a.infos); i++ {
+ cut = append(cut, a.infos[i].unquoted)
+ }
+ end := a.infos[len(a.infos)-n].start
+ a.infos = a.infos[:len(a.infos)-n]
+ a.raw = a.raw[:end]
+ case n == len(a.infos):
+ for i := 0; i < n; i++ {
+ cut = append(cut, a.infos[i].unquoted)
+ }
+ a.raw = nil
+ a.infos = nil
+ default:
+ return nil, ErrArgIndex
+ }
+ return cut, nil
+}
+
+// Same as CutSafe but cannot fail.
+func (a *Args) Cut(n int) []string {
+ if n < 0 {
+ n = 0
+ } else if n > len(a.infos) {
+ n = len(a.infos)
+ }
+ cut, _ := a.CutSafe(n)
+ return cut
+}
+
+// Insert the specified prefix at the begining of the command line.
+func (a *Args) Prepend(cmd string) error {
+ if !strings.HasSuffix(cmd, " ") {
+ cmd += " "
+ }
+ prefix := []rune(cmd)
+ args, err := parseArgs(prefix)
+ if err != nil {
+ return err
+ }
+ for i := range a.infos {
+ a.infos[i].start += len(prefix)
+ }
+ a.raw = append(prefix, a.raw...)
+ a.infos = append(args, a.infos...)
+ return nil
+}
+
+// Extend the command line with more arguments.
+func (a *Args) Extend(cmd string) error {
+ if !strings.HasPrefix(cmd, " ") {
+ cmd = " " + cmd
+ }
+ suffix := []rune(cmd)
+ args, err := parseArgs(suffix)
+ if err != nil {
+ return err
+ }
+ for i := range args {
+ args[i].start += len(a.raw)
+ }
+ a.raw = append(a.raw, suffix...)
+ a.infos = append(a.infos, args...)
+ return nil
+}
+
+// Get the nth argument after interpreting shell quotes.
+func (a *Args) ArgSafe(n int) (string, error) {
+ if n < 0 || n >= len(a.infos) {
+ return "", ErrArgIndex
+ }
+ return a.infos[n].unquoted, nil
+}
+
+// Get the nth argument after interpreting shell quotes.
+// Will panic if the argument index does not exist.
+func (a *Args) Arg(n int) string {
+ return a.infos[n].unquoted
+}
+
+// Get all arguments after interpreting shell quotes.
+func (a *Args) Args() []string {
+ args := make([]string, 0, len(a.infos))
+ for n := 0; n < len(a.infos); n++ {
+ args = append(args, a.infos[n].unquoted)
+ }
+ return args
+}
+
+// Get the raw command line, with uninterpreted shell quotes.
+func (a *Args) String() string {
+ if len(a.infos) == 0 {
+ return ""
+ }
+ return string(a.raw)
+}
+
+func (a *Args) Clone() *Args {
+ infos := make([]argInfo, len(a.infos))
+ copy(infos, a.infos)
+ raw := make([]rune, len(a.raw))
+ copy(raw, a.raw)
+ return &Args{raw: raw, infos: infos}
+}
diff --git a/lib/opt/args_test.go b/lib/opt/args_test.go
new file mode 100644
index 00000000..a8f4f826
--- /dev/null
+++ b/lib/opt/args_test.go
@@ -0,0 +1,106 @@
+package opt
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestSplitArgs(t *testing.T) {
+ vectors := []struct {
+ cmd string
+ args []string
+ shift int
+ shifted []string
+ shiftArgs []string
+ shiftString string
+ prepend string
+ prependedArgs []string
+ prependedString string
+ cut int
+ cutted []string
+ cutArgs []string
+ cutString string
+ extend string
+ extendedArgs []string
+ extendedString string
+ }{
+ {
+ cmd: "a b c",
+ args: []string{"a", "b", "c"},
+ shift: 0,
+ shifted: []string{},
+ shiftArgs: []string{"a", "b", "c"},
+ shiftString: "a b c",
+ prepend: "z ",
+ prependedArgs: []string{"z", "a", "b", "c"},
+ prependedString: "z a b c",
+ cut: 0,
+ cutted: []string{},
+ cutArgs: []string{"z", "a", "b", "c"},
+ cutString: "z a b c",
+ extend: " x",
+ extendedArgs: []string{"z", "a", "b", "c", "x"},
+ extendedString: "z a b c x",
+ },
+ {
+ cmd: " 'foo'\t-bar c $d | zz $bar 'x y z' ",
+ args: []string{"foo", "-bar", "c", "$d", "|", "zz", "$bar", "x y z"},
+ shift: 2,
+ shifted: []string{"foo", "-bar"},
+ shiftArgs: []string{"c", "$d", "|", "zz", "$bar", "x y z"},
+ shiftString: "c $d | zz $bar 'x y z' ",
+ prepend: `baz -p "$aeiouy noooo"`,
+ prependedArgs: []string{"baz", "-p", "$aeiouy noooo", "c", "$d", "|", "zz", "$bar", "x y z"},
+ prependedString: `baz -p "$aeiouy noooo" c $d | zz $bar 'x y z' `,
+ cut: 1,
+ cutted: []string{"x y z"},
+ cutArgs: []string{"baz", "-p", "$aeiouy noooo", "c", "$d", "|", "zz", "$bar"},
+ cutString: `baz -p "$aeiouy noooo" c $d | zz $bar `,
+ extend: "'eeeee eeee'",
+ extendedArgs: []string{"baz", "-p", "$aeiouy noooo", "c", "$d", "|", "zz", "$bar", "eeeee eeee"},
+ extendedString: `baz -p "$aeiouy noooo" c $d | zz $bar 'eeeee eeee'`,
+ },
+ {
+ cmd: `foo -xz \"bar 'baz\"' "\$baz \" ok"`,
+ args: []string{"foo", "-xz", `"bar`, `baz\"`, `$baz " ok`},
+ shift: 2,
+ shifted: []string{"foo", "-xz"},
+ shiftArgs: []string{`"bar`, `baz\"`, `$baz " ok`},
+ shiftString: `\"bar 'baz\"' "\$baz \" ok"`,
+ prepend: "find 'bleh' | xargs -uuuuu u",
+ prependedArgs: []string{"find", "bleh", "|", "xargs", "-uuuuu", "u", `"bar`, `baz\"`, `$baz " ok`},
+ prependedString: `find 'bleh' | xargs -uuuuu u \"bar 'baz\"' "\$baz \" ok"`,
+ cut: 2,
+ cutted: []string{`baz\"`, `$baz " ok`},
+ cutArgs: []string{"find", "bleh", "|", "xargs", "-uuuuu", "u", `"bar`},
+ cutString: `find 'bleh' | xargs -uuuuu u \"bar `,
+ extend: "|| rm -rf / &",
+ extendedArgs: []string{"find", "bleh", "|", "xargs", "-uuuuu", "u", `"bar`, "||", "rm", "-rf", "/", "&"},
+ extendedString: `find 'bleh' | xargs -uuuuu u \"bar || rm -rf / &`,
+ },
+ }
+ for _, vec := range vectors {
+ t.Run(vec.cmd, func(t *testing.T) {
+ cmd, err := SplitArgs(vec.cmd)
+ assert.Nil(t, err)
+ assert.Equal(t, vec.args, cmd.Args())
+ shifted := cmd.Shift(vec.shift)
+ assert.Equal(t, vec.shifted, shifted)
+ assert.Equal(t, vec.shiftArgs, cmd.Args())
+ assert.Equal(t, vec.shiftString, cmd.String())
+ err = cmd.Prepend(vec.prepend)
+ assert.Nil(t, err)
+ assert.Equal(t, vec.prependedArgs, cmd.Args())
+ assert.Equal(t, vec.prependedString, cmd.String())
+ cutted := cmd.Cut(vec.cut)
+ assert.Equal(t, vec.cutted, cutted)
+ assert.Equal(t, vec.cutArgs, cmd.Args())
+ assert.Equal(t, vec.cutString, cmd.String())
+ err = cmd.Extend(vec.extend)
+ assert.Nil(t, err)
+ assert.Equal(t, vec.extendedArgs, cmd.Args())
+ assert.Equal(t, vec.extendedString, cmd.String())
+ })
+ }
+}
diff --git a/lib/opt/shlex.go b/lib/opt/shlex.go
new file mode 100644
index 00000000..682eaf6a
--- /dev/null
+++ b/lib/opt/shlex.go
@@ -0,0 +1,174 @@
+// This code has been inspired from https://github.com/google/shlex
+//
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package opt
+
+import (
+ "fmt"
+)
+
+// runeClass is the type of a UTF-8 character classification: A quote, space,
+// escape.
+type runeClass int
+
+const (
+ otherRuneClass runeClass = iota
+ spaceRuneClass
+ escapingQuoteRuneClass
+ nonEscapingQuoteRuneClass
+ escapeRuneClass
+ commentRuneClass
+)
+
+var runeClasses = map[rune]runeClass{
+ ' ': spaceRuneClass,
+ '\t': spaceRuneClass,
+ '\r': spaceRuneClass,
+ '\n': spaceRuneClass,
+ '"': escapingQuoteRuneClass,
+ '\'': nonEscapingQuoteRuneClass,
+ '\\': escapeRuneClass,
+ '#': commentRuneClass,
+}
+
+// the internal state used by the lexer state machine
+type lexerState int
+
+// Lexer state machine states
+const (
+ // no runes have been seen
+ startState lexerState = iota
+ // processing regular runes in a word
+ inWordState
+ // we have just consumed an escape rune; the next rune is literal
+ escapingState
+ // we have just consumed an escape rune within a quoted string
+ escapingQuotedState
+ // we are within a quoted string that supports escaping ("...")
+ quotingEscapingState
+ // we are within a string that does not support escaping ('...')
+ quotingState
+ // we are within a comment (everything following an unquoted or unescaped #
+ commentState
+)
+
+// Each argument info contains the start offset of the raw argument in the
+// command line (including shell escapes, quotes, etc.), and its "unquoted"
+// value after interpreting shell quotes and escapes.
+type argInfo struct {
+ start int
+ unquoted string
+}
+
+// Parse a raw command line and return a list of argument info structs
+func parseArgs(raw []rune) ([]argInfo, error) {
+ state := startState
+ args := make([]argInfo, 0)
+ var unquoted []rune
+ var start int
+
+ for i, nextRune := range raw {
+ class := runeClasses[nextRune]
+
+ switch state {
+ case startState: // no runes read yet
+ switch class {
+ case spaceRuneClass:
+ break
+ case commentRuneClass:
+ state = commentState
+ case escapingQuoteRuneClass:
+ state = quotingEscapingState
+ case nonEscapingQuoteRuneClass:
+ state = quotingState
+ case escapeRuneClass:
+ state = escapingState
+ default:
+ // start a new word
+ unquoted = []rune{nextRune}
+ state = inWordState
+ }
+ start = i
+ case inWordState: // in a regular word
+ switch class {
+ case spaceRuneClass:
+ args = append(args, argInfo{
+ start: start,
+ unquoted: string(unquoted),
+ })
+ unquoted = nil
+ state = startState
+ case escapingQuoteRuneClass:
+ state = quotingEscapingState
+ case nonEscapingQuoteRuneClass:
+ state = quotingState
+ case escapeRuneClass:
+ state = escapingState
+ default:
+ unquoted = append(unquoted, nextRune)
+ }
+ case escapingState: // the rune after an escape character
+ state = inWordState
+ unquoted = append(unquoted, nextRune)
+ case escapingQuotedState: // the next rune after an escape character, in double quotes
+ state = quotingEscapingState
+ unquoted = append(unquoted, nextRune)
+ case quotingEscapingState: // in escaping double quotes
+ switch class {
+ case escapingQuoteRuneClass:
+ state = inWordState
+ case escapeRuneClass:
+ state = escapingQuotedState
+ default:
+ unquoted = append(unquoted, nextRune)
+ }
+ case quotingState: // in non-escaping single quotes
+ switch class {
+ case nonEscapingQuoteRuneClass:
+ state = inWordState
+ default:
+ unquoted = append(unquoted, nextRune)
+ }
+ case commentState: // in a comment
+ if nextRune == '\n' {
+ state = startState
+ }
+ default:
+ return nil, fmt.Errorf("Unexpected state: %v", state)
+ }
+ }
+
+ var err error
+ switch state {
+ case escapingState:
+ err = fmt.Errorf("EOF found after escape character")
+ case escapingQuotedState:
+ err = fmt.Errorf("EOF found after escape character")
+ case quotingEscapingState:
+ err = fmt.Errorf("EOF found when expecting closing double quote")
+ case quotingState:
+ err = fmt.Errorf("EOF found when expecting closing single quote")
+ }
+
+ if unquoted != nil {
+ args = append(args, argInfo{
+ start: start,
+ unquoted: string(unquoted),
+ })
+ }
+
+ return args, err
+}