encoding/csv: report line start line in errors

Errors returned by Reader contain the line where the Reader originally encountered the error. This can be suboptimal since that line does not always correspond with the line the current record/field started at. This can easily happen with LazyQuotes as seen in #19019, but also happens for example when a quoted fields has no closing quote and the parser hits EOF before it finds another quote. When this happens finding the erroneous field can be somewhat complicated and time consuming, and in most cases it would be better to report the line where the record started. This change updates Reader to keep track of the line on which a record begins and uses it for errors instead of the current line, making it easier to find errors. Although a user-visible change, this should have no impact on existing code, since most users don't explicitly work with the line in the error and probably already expect the new behaviour. Updates #19019 Change-Id: Ic9bc70fad2651c69435d614d537e7a9266819b05 Reviewed-on: https://go-review.googlesource.com/52830 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
author: Justin Nuß <nuss.justin@gmail.com> 2017-08-02 19:18:30 +0200
committer: Ian Lance Taylor <iant@golang.org> 2017-08-14 04:45:38 +0000
commit: 5d14ac74f614631a38be95cc9724ff38068c6c76 (patch)
tree: dec1fcac0d9ff56992966bdf4c0f68f154579843 /src/encoding/csv
parent: 342d25fc05a915f267e9d4f0bdc13fb04e014a2b (diff)
download: go-5d14ac74f614631a38be95cc9724ff38068c6c76.tar.gz
go-5d14ac74f614631a38be95cc9724ff38068c6c76.zip
2 files changed, 21 insertions, 4 deletions
diff --git a/src/encoding/csv/reader.go b/src/encoding/csv/reader.go
index a3497c84f9..4085321a71 100644
--- a/src/encoding/csv/reader.go
+++ b/src/encoding/csv/reader.go
@@ -115,9 +115,10 @@ type Reader struct {
 	// By default, each call to Read returns newly allocated memory owned by the caller.
 	ReuseRecord bool
 
-	line   int
-	column int
-	r      *bufio.Reader
+	line       int
+	recordLine int // line where the current record started
+	column     int
+	r          *bufio.Reader
 	// lineBuffer holds the unescaped fields read by readField, one after another.
 	// The fields can be accessed by using the indexes in fieldIndexes.
 	// Example: for the row `a,"b","c""d",e` lineBuffer will contain `abc"de` and
@@ -142,7 +143,7 @@ func NewReader(r io.Reader) *Reader {
 // error creates a new ParseError based on err.
 func (r *Reader) error(err error) error {
 	return &ParseError{
-		Line:   r.line,
+		Line:   r.recordLine,
 		Column: r.column,
 		Err:    err,
 	}
@@ -251,7 +252,9 @@ func (r *Reader) parseRecord(dst []string) (fields []string, err error) {
 	// Each record starts on a new line. We increment our line
 	// number (lines start at 1, not 0) and set column to -1
 	// so as we increment in readRune it points to the character we read.
+	// We track the line where the record starts in recordLine for use in errors.
 	r.line++
+	r.recordLine = r.line
 	r.column = -1
 
 	// Peek at the first rune. If it is an error we are done.
diff --git a/src/encoding/csv/reader_test.go b/src/encoding/csv/reader_test.go
index 5ab1b61256..76e94bab3e 100644
--- a/src/encoding/csv/reader_test.go
+++ b/src/encoding/csv/reader_test.go
@@ -270,6 +270,20 @@ x,,,
 			{"c", "d"},
 		},
 	},
+	{ // issue 19019
+		Name:   "RecordLine1",
+		Input:  "a,\"b\nc\"d,e",
+		Error:  `extraneous " in field`,
+		Line:   1,
+		Column: 1,
+	},
+	{
+		Name:   "RecordLine2",
+		Input:  "a,b\n\"d\n\n,e",
+		Error:  `extraneous " in field`,
+		Line:   2,
+		Column: 2,
+	},
 }
 
 func TestRead(t *testing.T) {
author	Justin Nuß <nuss.justin@gmail.com>	2017-08-02 19:18:30 +0200
committer	Ian Lance Taylor <iant@golang.org>	2017-08-14 04:45:38 +0000
commit	5d14ac74f614631a38be95cc9724ff38068c6c76 (patch)
tree	dec1fcac0d9ff56992966bdf4c0f68f154579843 /src/encoding/csv
parent	342d25fc05a915f267e9d4f0bdc13fb04e014a2b (diff)
download	go-5d14ac74f614631a38be95cc9724ff38068c6c76.tar.gz go-5d14ac74f614631a38be95cc9724ff38068c6c76.zip