diff options
author | Robin Jarry <robin@jarry.cc> | 2023-10-03 12:02:17 +0200 |
---|---|---|
committer | Robin Jarry <robin@jarry.cc> | 2023-10-03 22:34:39 +0200 |
commit | bbe5e81538a537a3c65d9b94e0dbb1d4cd1604df (patch) | |
tree | 7471b9dc199672dddb1c934e8b54f77604d593fe | |
parent | a99e0f007ead1d427465efe987294c19ed2f26f2 (diff) | |
download | aerc-bbe5e81538a537a3c65d9b94e0dbb1d4cd1604df.tar.gz aerc-bbe5e81538a537a3c65d9b94e0dbb1d4cd1604df.zip |
lib: add shell command lexer
Signed-off-by: Robin Jarry <robin@jarry.cc>
-rw-r--r-- | lib/opt/args.go | 201 | ||||
-rw-r--r-- | lib/opt/args_test.go | 106 | ||||
-rw-r--r-- | lib/opt/shlex.go | 174 |
3 files changed, 481 insertions, 0 deletions
diff --git a/lib/opt/args.go b/lib/opt/args.go new file mode 100644 index 00000000..f1a4ed2c --- /dev/null +++ b/lib/opt/args.go @@ -0,0 +1,201 @@ +package opt + +import ( + "errors" + "strings" +) + +// Shell command line with interpreted arguments. +// Allows access to individual arguments and to preserve shell quoting. +type Args struct { + raw []rune + infos []argInfo +} + +// Interpret a shell command line into multiple arguments. +func SplitArgs(cmd string) (*Args, error) { + raw := []rune(cmd) + infos, err := parseArgs(raw) + if err != nil { + return nil, err + } + return &Args{raw: raw, infos: infos}, nil +} + +// Build a shell command from multiple arguments. +func QuoteArgs(args ...string) (*Args, error) { + quoted := make([]string, len(args)) + for i, arg := range args { + quoted[i] = QuoteArg(arg) + } + return SplitArgs(strings.Join(quoted, " ")) +} + +// Wrap a single argument with appropriate quoting so that it can be used +// in a shell command. +func QuoteArg(arg string) string { + if strings.ContainsAny(arg, " '\"|?&!#$;[](){}<>*\n\t") { + arg = "'" + strings.ReplaceAll(arg, "'", `'"'"'`) + "'" + } + return arg +} + +// Get the number of arguments after interpreting shell quotes. +func (a *Args) Count() int { + return len(a.infos) +} + +var ErrArgIndex = errors.New("argument index out of bounds") + +// Remove n arguments from the begining of the command line. +// Same semantics as the `shift` built-in shell command. +// Will fail if shifting an invalid number of arguments. +func (a *Args) ShiftSafe(n int) ([]string, error) { + var shifted []string + switch { + case n == 0: + shifted = []string{} + case n >= 0 && n < len(a.infos): + for i := 0; i < n; i++ { + shifted = append(shifted, a.infos[i].unquoted) + } + a.infos = a.infos[n:] + start := a.infos[0].start + a.raw = a.raw[start:] + for i := range a.infos { + a.infos[i].start -= start + } + case n == len(a.infos): + for i := 0; i < n; i++ { + shifted = append(shifted, a.infos[i].unquoted) + } + a.raw = nil + a.infos = nil + default: + return nil, ErrArgIndex + } + return shifted, nil +} + +// Same as ShiftSafe but cannot fail. +func (a *Args) Shift(n int) []string { + if n < 0 { + n = 0 + } else if n > len(a.infos) { + n = len(a.infos) + } + shifted, _ := a.ShiftSafe(n) + return shifted +} + +// Remove n arguments from the end of the command line. +// Will fail if cutting an invalid number of arguments. +func (a *Args) CutSafe(n int) ([]string, error) { + var cut []string + switch { + case n == 0: + cut = []string{} + case n >= 0 && n < len(a.infos): + for i := len(a.infos) - n; i < len(a.infos); i++ { + cut = append(cut, a.infos[i].unquoted) + } + end := a.infos[len(a.infos)-n].start + a.infos = a.infos[:len(a.infos)-n] + a.raw = a.raw[:end] + case n == len(a.infos): + for i := 0; i < n; i++ { + cut = append(cut, a.infos[i].unquoted) + } + a.raw = nil + a.infos = nil + default: + return nil, ErrArgIndex + } + return cut, nil +} + +// Same as CutSafe but cannot fail. +func (a *Args) Cut(n int) []string { + if n < 0 { + n = 0 + } else if n > len(a.infos) { + n = len(a.infos) + } + cut, _ := a.CutSafe(n) + return cut +} + +// Insert the specified prefix at the begining of the command line. +func (a *Args) Prepend(cmd string) error { + if !strings.HasSuffix(cmd, " ") { + cmd += " " + } + prefix := []rune(cmd) + args, err := parseArgs(prefix) + if err != nil { + return err + } + for i := range a.infos { + a.infos[i].start += len(prefix) + } + a.raw = append(prefix, a.raw...) + a.infos = append(args, a.infos...) + return nil +} + +// Extend the command line with more arguments. +func (a *Args) Extend(cmd string) error { + if !strings.HasPrefix(cmd, " ") { + cmd = " " + cmd + } + suffix := []rune(cmd) + args, err := parseArgs(suffix) + if err != nil { + return err + } + for i := range args { + args[i].start += len(a.raw) + } + a.raw = append(a.raw, suffix...) + a.infos = append(a.infos, args...) + return nil +} + +// Get the nth argument after interpreting shell quotes. +func (a *Args) ArgSafe(n int) (string, error) { + if n < 0 || n >= len(a.infos) { + return "", ErrArgIndex + } + return a.infos[n].unquoted, nil +} + +// Get the nth argument after interpreting shell quotes. +// Will panic if the argument index does not exist. +func (a *Args) Arg(n int) string { + return a.infos[n].unquoted +} + +// Get all arguments after interpreting shell quotes. +func (a *Args) Args() []string { + args := make([]string, 0, len(a.infos)) + for n := 0; n < len(a.infos); n++ { + args = append(args, a.infos[n].unquoted) + } + return args +} + +// Get the raw command line, with uninterpreted shell quotes. +func (a *Args) String() string { + if len(a.infos) == 0 { + return "" + } + return string(a.raw) +} + +func (a *Args) Clone() *Args { + infos := make([]argInfo, len(a.infos)) + copy(infos, a.infos) + raw := make([]rune, len(a.raw)) + copy(raw, a.raw) + return &Args{raw: raw, infos: infos} +} diff --git a/lib/opt/args_test.go b/lib/opt/args_test.go new file mode 100644 index 00000000..a8f4f826 --- /dev/null +++ b/lib/opt/args_test.go @@ -0,0 +1,106 @@ +package opt + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSplitArgs(t *testing.T) { + vectors := []struct { + cmd string + args []string + shift int + shifted []string + shiftArgs []string + shiftString string + prepend string + prependedArgs []string + prependedString string + cut int + cutted []string + cutArgs []string + cutString string + extend string + extendedArgs []string + extendedString string + }{ + { + cmd: "a b c", + args: []string{"a", "b", "c"}, + shift: 0, + shifted: []string{}, + shiftArgs: []string{"a", "b", "c"}, + shiftString: "a b c", + prepend: "z ", + prependedArgs: []string{"z", "a", "b", "c"}, + prependedString: "z a b c", + cut: 0, + cutted: []string{}, + cutArgs: []string{"z", "a", "b", "c"}, + cutString: "z a b c", + extend: " x", + extendedArgs: []string{"z", "a", "b", "c", "x"}, + extendedString: "z a b c x", + }, + { + cmd: " 'foo'\t-bar c $d | zz $bar 'x y z' ", + args: []string{"foo", "-bar", "c", "$d", "|", "zz", "$bar", "x y z"}, + shift: 2, + shifted: []string{"foo", "-bar"}, + shiftArgs: []string{"c", "$d", "|", "zz", "$bar", "x y z"}, + shiftString: "c $d | zz $bar 'x y z' ", + prepend: `baz -p "$aeiouy noooo"`, + prependedArgs: []string{"baz", "-p", "$aeiouy noooo", "c", "$d", "|", "zz", "$bar", "x y z"}, + prependedString: `baz -p "$aeiouy noooo" c $d | zz $bar 'x y z' `, + cut: 1, + cutted: []string{"x y z"}, + cutArgs: []string{"baz", "-p", "$aeiouy noooo", "c", "$d", "|", "zz", "$bar"}, + cutString: `baz -p "$aeiouy noooo" c $d | zz $bar `, + extend: "'eeeee eeee'", + extendedArgs: []string{"baz", "-p", "$aeiouy noooo", "c", "$d", "|", "zz", "$bar", "eeeee eeee"}, + extendedString: `baz -p "$aeiouy noooo" c $d | zz $bar 'eeeee eeee'`, + }, + { + cmd: `foo -xz \"bar 'baz\"' "\$baz \" ok"`, + args: []string{"foo", "-xz", `"bar`, `baz\"`, `$baz " ok`}, + shift: 2, + shifted: []string{"foo", "-xz"}, + shiftArgs: []string{`"bar`, `baz\"`, `$baz " ok`}, + shiftString: `\"bar 'baz\"' "\$baz \" ok"`, + prepend: "find 'bleh' | xargs -uuuuu u", + prependedArgs: []string{"find", "bleh", "|", "xargs", "-uuuuu", "u", `"bar`, `baz\"`, `$baz " ok`}, + prependedString: `find 'bleh' | xargs -uuuuu u \"bar 'baz\"' "\$baz \" ok"`, + cut: 2, + cutted: []string{`baz\"`, `$baz " ok`}, + cutArgs: []string{"find", "bleh", "|", "xargs", "-uuuuu", "u", `"bar`}, + cutString: `find 'bleh' | xargs -uuuuu u \"bar `, + extend: "|| rm -rf / &", + extendedArgs: []string{"find", "bleh", "|", "xargs", "-uuuuu", "u", `"bar`, "||", "rm", "-rf", "/", "&"}, + extendedString: `find 'bleh' | xargs -uuuuu u \"bar || rm -rf / &`, + }, + } + for _, vec := range vectors { + t.Run(vec.cmd, func(t *testing.T) { + cmd, err := SplitArgs(vec.cmd) + assert.Nil(t, err) + assert.Equal(t, vec.args, cmd.Args()) + shifted := cmd.Shift(vec.shift) + assert.Equal(t, vec.shifted, shifted) + assert.Equal(t, vec.shiftArgs, cmd.Args()) + assert.Equal(t, vec.shiftString, cmd.String()) + err = cmd.Prepend(vec.prepend) + assert.Nil(t, err) + assert.Equal(t, vec.prependedArgs, cmd.Args()) + assert.Equal(t, vec.prependedString, cmd.String()) + cutted := cmd.Cut(vec.cut) + assert.Equal(t, vec.cutted, cutted) + assert.Equal(t, vec.cutArgs, cmd.Args()) + assert.Equal(t, vec.cutString, cmd.String()) + err = cmd.Extend(vec.extend) + assert.Nil(t, err) + assert.Equal(t, vec.extendedArgs, cmd.Args()) + assert.Equal(t, vec.extendedString, cmd.String()) + }) + } +} diff --git a/lib/opt/shlex.go b/lib/opt/shlex.go new file mode 100644 index 00000000..682eaf6a --- /dev/null +++ b/lib/opt/shlex.go @@ -0,0 +1,174 @@ +// This code has been inspired from https://github.com/google/shlex +// +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package opt + +import ( + "fmt" +) + +// runeClass is the type of a UTF-8 character classification: A quote, space, +// escape. +type runeClass int + +const ( + otherRuneClass runeClass = iota + spaceRuneClass + escapingQuoteRuneClass + nonEscapingQuoteRuneClass + escapeRuneClass + commentRuneClass +) + +var runeClasses = map[rune]runeClass{ + ' ': spaceRuneClass, + '\t': spaceRuneClass, + '\r': spaceRuneClass, + '\n': spaceRuneClass, + '"': escapingQuoteRuneClass, + '\'': nonEscapingQuoteRuneClass, + '\\': escapeRuneClass, + '#': commentRuneClass, +} + +// the internal state used by the lexer state machine +type lexerState int + +// Lexer state machine states +const ( + // no runes have been seen + startState lexerState = iota + // processing regular runes in a word + inWordState + // we have just consumed an escape rune; the next rune is literal + escapingState + // we have just consumed an escape rune within a quoted string + escapingQuotedState + // we are within a quoted string that supports escaping ("...") + quotingEscapingState + // we are within a string that does not support escaping ('...') + quotingState + // we are within a comment (everything following an unquoted or unescaped # + commentState +) + +// Each argument info contains the start offset of the raw argument in the +// command line (including shell escapes, quotes, etc.), and its "unquoted" +// value after interpreting shell quotes and escapes. +type argInfo struct { + start int + unquoted string +} + +// Parse a raw command line and return a list of argument info structs +func parseArgs(raw []rune) ([]argInfo, error) { + state := startState + args := make([]argInfo, 0) + var unquoted []rune + var start int + + for i, nextRune := range raw { + class := runeClasses[nextRune] + + switch state { + case startState: // no runes read yet + switch class { + case spaceRuneClass: + break + case commentRuneClass: + state = commentState + case escapingQuoteRuneClass: + state = quotingEscapingState + case nonEscapingQuoteRuneClass: + state = quotingState + case escapeRuneClass: + state = escapingState + default: + // start a new word + unquoted = []rune{nextRune} + state = inWordState + } + start = i + case inWordState: // in a regular word + switch class { + case spaceRuneClass: + args = append(args, argInfo{ + start: start, + unquoted: string(unquoted), + }) + unquoted = nil + state = startState + case escapingQuoteRuneClass: + state = quotingEscapingState + case nonEscapingQuoteRuneClass: + state = quotingState + case escapeRuneClass: + state = escapingState + default: + unquoted = append(unquoted, nextRune) + } + case escapingState: // the rune after an escape character + state = inWordState + unquoted = append(unquoted, nextRune) + case escapingQuotedState: // the next rune after an escape character, in double quotes + state = quotingEscapingState + unquoted = append(unquoted, nextRune) + case quotingEscapingState: // in escaping double quotes + switch class { + case escapingQuoteRuneClass: + state = inWordState + case escapeRuneClass: + state = escapingQuotedState + default: + unquoted = append(unquoted, nextRune) + } + case quotingState: // in non-escaping single quotes + switch class { + case nonEscapingQuoteRuneClass: + state = inWordState + default: + unquoted = append(unquoted, nextRune) + } + case commentState: // in a comment + if nextRune == '\n' { + state = startState + } + default: + return nil, fmt.Errorf("Unexpected state: %v", state) + } + } + + var err error + switch state { + case escapingState: + err = fmt.Errorf("EOF found after escape character") + case escapingQuotedState: + err = fmt.Errorf("EOF found after escape character") + case quotingEscapingState: + err = fmt.Errorf("EOF found when expecting closing double quote") + case quotingState: + err = fmt.Errorf("EOF found when expecting closing single quote") + } + + if unquoted != nil { + args = append(args, argInfo{ + start: start, + unquoted: string(unquoted), + }) + } + + return args, err +} |