Skip to content

Commit b7bac2f

Browse files
authored
feat(bigquery): add PreserveAsciiControlCharacters support for CSV (#6448)
* feat(bigquery): add PreserveAsciiControlCharacters support for CSV
1 parent 74da335 commit b7bac2f

File tree

3 files changed

+48
-37
lines changed

3 files changed

+48
-37
lines changed

β€Žbigquery/external.go

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -230,17 +230,22 @@ type CSVOptions struct {
230230
// An optional custom string that will represent a NULL
231231
// value in CSV import data.
232232
NullMarker string
233+
234+
// Preserves the embedded ASCII control characters (the first 32 characters in the ASCII-table,
235+
// from '\\x00' to '\\x1F') when loading from CSV. Only applicable to CSV, ignored for other formats.
236+
PreserveASCIIControlCharacters bool
233237
}
234238

235239
func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) {
236240
c.CsvOptions = &bq.CsvOptions{
237-
AllowJaggedRows: o.AllowJaggedRows,
238-
AllowQuotedNewlines: o.AllowQuotedNewlines,
239-
Encoding: string(o.Encoding),
240-
FieldDelimiter: o.FieldDelimiter,
241-
Quote: o.quote(),
242-
SkipLeadingRows: o.SkipLeadingRows,
243-
NullMarker: o.NullMarker,
241+
AllowJaggedRows: o.AllowJaggedRows,
242+
AllowQuotedNewlines: o.AllowQuotedNewlines,
243+
Encoding: string(o.Encoding),
244+
FieldDelimiter: o.FieldDelimiter,
245+
Quote: o.quote(),
246+
SkipLeadingRows: o.SkipLeadingRows,
247+
NullMarker: o.NullMarker,
248+
PreserveAsciiControlCharacters: o.PreserveASCIIControlCharacters,
244249
}
245250
}
246251

@@ -267,12 +272,13 @@ func (o *CSVOptions) setQuote(ps *string) {
267272

268273
func bqToCSVOptions(q *bq.CsvOptions) *CSVOptions {
269274
o := &CSVOptions{
270-
AllowJaggedRows: q.AllowJaggedRows,
271-
AllowQuotedNewlines: q.AllowQuotedNewlines,
272-
Encoding: Encoding(q.Encoding),
273-
FieldDelimiter: q.FieldDelimiter,
274-
SkipLeadingRows: q.SkipLeadingRows,
275-
NullMarker: q.NullMarker,
275+
AllowJaggedRows: q.AllowJaggedRows,
276+
AllowQuotedNewlines: q.AllowQuotedNewlines,
277+
Encoding: Encoding(q.Encoding),
278+
FieldDelimiter: q.FieldDelimiter,
279+
SkipLeadingRows: q.SkipLeadingRows,
280+
NullMarker: q.NullMarker,
281+
PreserveASCIIControlCharacters: q.PreserveAsciiControlCharacters,
276282
}
277283
o.setQuote(q.Quote)
278284
return o

β€Žbigquery/file.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
9393
conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
9494
conf.MaxBadRecords = fc.MaxBadRecords
9595
conf.NullMarker = fc.NullMarker
96+
conf.PreserveAsciiControlCharacters = fc.PreserveASCIIControlCharacters
9697
if fc.Schema != nil {
9798
conf.Schema = fc.Schema.toBQ()
9899
}
@@ -120,6 +121,7 @@ func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
120121
fc.Encoding = Encoding(conf.Encoding)
121122
fc.FieldDelimiter = conf.FieldDelimiter
122123
fc.CSVOptions.NullMarker = conf.NullMarker
124+
fc.CSVOptions.PreserveASCIIControlCharacters = conf.PreserveAsciiControlCharacters
123125
fc.CSVOptions.setQuote(conf.Quote)
124126
}
125127

β€Žbigquery/file_test.go

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ var (
3333
nestedFieldSchema(),
3434
},
3535
CSVOptions: CSVOptions{
36-
Quote: hyphen,
37-
FieldDelimiter: "\t",
38-
SkipLeadingRows: 8,
39-
AllowJaggedRows: true,
40-
AllowQuotedNewlines: true,
41-
Encoding: UTF_8,
42-
NullMarker: "marker",
36+
Quote: hyphen,
37+
FieldDelimiter: "\t",
38+
SkipLeadingRows: 8,
39+
AllowJaggedRows: true,
40+
AllowQuotedNewlines: true,
41+
Encoding: UTF_8,
42+
NullMarker: "marker",
43+
PreserveASCIIControlCharacters: true,
4344
},
4445
}
4546
)
@@ -63,16 +64,17 @@ func TestFileConfigPopulateLoadConfig(t *testing.T) {
6364
description: "csv",
6465
fileConfig: &fc,
6566
want: &bq.JobConfigurationLoad{
66-
SourceFormat: "CSV",
67-
FieldDelimiter: "\t",
68-
SkipLeadingRows: 8,
69-
AllowJaggedRows: true,
70-
AllowQuotedNewlines: true,
71-
Autodetect: true,
72-
Encoding: "UTF-8",
73-
MaxBadRecords: 7,
74-
IgnoreUnknownValues: true,
75-
NullMarker: "marker",
67+
SourceFormat: "CSV",
68+
FieldDelimiter: "\t",
69+
SkipLeadingRows: 8,
70+
AllowJaggedRows: true,
71+
AllowQuotedNewlines: true,
72+
Autodetect: true,
73+
Encoding: "UTF-8",
74+
MaxBadRecords: 7,
75+
IgnoreUnknownValues: true,
76+
NullMarker: "marker",
77+
PreserveAsciiControlCharacters: true,
7678
Schema: &bq.TableSchema{
7779
Fields: []*bq.TableFieldSchema{
7880
bqStringFieldSchema(),
@@ -150,13 +152,14 @@ func TestFileConfigPopulateExternalDataConfig(t *testing.T) {
150152
bqNestedFieldSchema(),
151153
}},
152154
CsvOptions: &bq.CsvOptions{
153-
AllowJaggedRows: true,
154-
AllowQuotedNewlines: true,
155-
Encoding: "UTF-8",
156-
FieldDelimiter: "\t",
157-
Quote: &hyphen,
158-
SkipLeadingRows: 8,
159-
NullMarker: "marker",
155+
AllowJaggedRows: true,
156+
AllowQuotedNewlines: true,
157+
Encoding: "UTF-8",
158+
FieldDelimiter: "\t",
159+
Quote: &hyphen,
160+
SkipLeadingRows: 8,
161+
NullMarker: "marker",
162+
PreserveAsciiControlCharacters: true,
160163
},
161164
},
162165
},

0 commit comments

Comments
 (0)