From ecf6fbd1874c2f26dd2d67271423e9f432f9d4d9 Mon Sep 17 00:00:00 2001 From: MaysWind Date: Fri, 8 Aug 2025 20:11:31 +0800 Subject: [PATCH] support setting whether the data table in csv / xls / xlsx files contains a header row --- ...ipay_transaction_data_csv_file_importer.go | 2 +- .../csv/csv_file_basic_data_table.go | 37 ++- .../csv/csv_file_basic_data_table_test.go | 88 +++++- ...stom_transaction_data_dsv_file_importer.go | 6 +- .../excel_mscfb_file_basic_data_table.go | 78 ++++-- .../excel_mscfb_file_basic_data_table_test.go | 264 ++++++++++++++++-- .../excel_ooxml_file_basic_data_table.go | 60 +++- .../excel_ooxml_file_basic_data_table_test.go | 248 +++++++++++++++- ..._app_transaction_data_csv_file_importer.go | 2 +- ...oud_transaction_data_xlsx_file_importer.go | 2 +- ..._web_transaction_data_xls_file_importer.go | 2 +- ...yiii_transaction_data_csv_file_importer.go | 2 +- ..._pay_transaction_data_csv_file_importer.go | 2 +- 13 files changed, 688 insertions(+), 105 deletions(-) diff --git a/pkg/converters/alipay/alipay_transaction_data_csv_file_importer.go b/pkg/converters/alipay/alipay_transaction_data_csv_file_importer.go index 1ab5d1e7..44c41d7c 100644 --- a/pkg/converters/alipay/alipay_transaction_data_csv_file_importer.go +++ b/pkg/converters/alipay/alipay_transaction_data_csv_file_importer.go @@ -156,7 +156,7 @@ func (c *alipayTransactionDataCsvFileImporter) createNewAlipayBasicDataTable(ctx return nil, errs.ErrNotFoundTransactionDataInFile } - dataTable := csvdatatable.CreateNewCustomCsvBasicDataTable(allOriginalLines) + dataTable := csvdatatable.CreateNewCustomCsvBasicDataTable(allOriginalLines, true) return dataTable, nil } diff --git a/pkg/converters/csv/csv_file_basic_data_table.go b/pkg/converters/csv/csv_file_basic_data_table.go index 628333c2..d5bad227 100644 --- a/pkg/converters/csv/csv_file_basic_data_table.go +++ b/pkg/converters/csv/csv_file_basic_data_table.go @@ -13,7 +13,8 @@ import ( // CsvFileBasicDataTable defines the structure of csv data table type CsvFileBasicDataTable struct { - allLines [][]string + allLines [][]string + hasTitleLine bool } // CsvFileBasicDataTableRow defines the structure of csv data table row @@ -34,7 +35,11 @@ func (t *CsvFileBasicDataTable) DataRowCount() int { return 0 } - return len(t.allLines) - 1 + if t.hasTitleLine { + return len(t.allLines) - 1 + } else { + return len(t.allLines) + } } // HeaderColumnNames returns the header column name list @@ -43,14 +48,24 @@ func (t *CsvFileBasicDataTable) HeaderColumnNames() []string { return nil } - return t.allLines[0] + if t.hasTitleLine { + return t.allLines[0] + } else { + return nil + } } // DataRowIterator returns the iterator of data row func (t *CsvFileBasicDataTable) DataRowIterator() datatable.BasicDataTableRowIterator { + startIndex := -1 + + if t.hasTitleLine { + startIndex = 0 + } + return &CsvFileBasicDataTableRowIterator{ dataTable: t, - currentIndex: 0, + currentIndex: startIndex, } } @@ -95,18 +110,19 @@ func (t *CsvFileBasicDataTableRowIterator) Next() datatable.BasicDataTableRow { } // CreateNewCsvBasicDataTable returns comma separated values data table by io readers -func CreateNewCsvBasicDataTable(ctx core.Context, reader io.Reader) (datatable.BasicDataTable, error) { - return createNewCsvFileBasicDataTable(ctx, reader, ',') +func CreateNewCsvBasicDataTable(ctx core.Context, reader io.Reader, hasTitleLine bool) (datatable.BasicDataTable, error) { + return createNewCsvFileBasicDataTable(ctx, reader, ',', hasTitleLine) } // CreateNewCustomCsvBasicDataTable returns character separated values data table by io readers -func CreateNewCustomCsvBasicDataTable(allLines [][]string) datatable.BasicDataTable { +func CreateNewCustomCsvBasicDataTable(allLines [][]string, hasTitleLine bool) datatable.BasicDataTable { return &CsvFileBasicDataTable{ - allLines: allLines, + allLines: allLines, + hasTitleLine: hasTitleLine, } } -func createNewCsvFileBasicDataTable(ctx core.Context, reader io.Reader, separator rune) (*CsvFileBasicDataTable, error) { +func createNewCsvFileBasicDataTable(ctx core.Context, reader io.Reader, separator rune, hasTitleLine bool) (*CsvFileBasicDataTable, error) { csvReader := csv.NewReader(reader) csvReader.Comma = separator csvReader.FieldsPerRecord = -1 @@ -133,6 +149,7 @@ func createNewCsvFileBasicDataTable(ctx core.Context, reader io.Reader, separato } return &CsvFileBasicDataTable{ - allLines: allLines, + allLines: allLines, + hasTitleLine: hasTitleLine, }, nil } diff --git a/pkg/converters/csv/csv_file_basic_data_table_test.go b/pkg/converters/csv/csv_file_basic_data_table_test.go index 086d06ff..1090b3c2 100644 --- a/pkg/converters/csv/csv_file_basic_data_table_test.go +++ b/pkg/converters/csv/csv_file_basic_data_table_test.go @@ -14,7 +14,17 @@ func TestCsvFileBasicDataTableDataRowCount(t *testing.T) { {"A1", "B1", "C1"}, {"A2", "B2", "C2"}, {"A3", "B3", "C3"}, - }) + }, false) + + assert.Equal(t, 3, datatable.DataRowCount()) +} + +func TestCsvFileBasicDataTableDataRowCount_HasTitleLine(t *testing.T) { + datatable := CreateNewCustomCsvBasicDataTable([][]string{ + {"A1", "B1", "C1"}, + {"A2", "B2", "C2"}, + {"A3", "B3", "C3"}, + }, true) assert.Equal(t, 2, datatable.DataRowCount()) } @@ -22,14 +32,16 @@ func TestCsvFileBasicDataTableDataRowCount(t *testing.T) { func TestCsvFileBasicDataTableDataRowCount_OnlyHeaderLine(t *testing.T) { datatable := CreateNewCustomCsvBasicDataTable([][]string{ {"A1", "B1", "C1"}, - }) + }, true) assert.Equal(t, 0, datatable.DataRowCount()) } func TestCsvFileBasicDataTableDataRowCount_EmptyContent(t *testing.T) { - datatable := CreateNewCustomCsvBasicDataTable([][]string{}) + datatable := CreateNewCustomCsvBasicDataTable([][]string{}, false) + assert.Equal(t, 0, datatable.DataRowCount()) + datatable = CreateNewCustomCsvBasicDataTable([][]string{}, true) assert.Equal(t, 0, datatable.DataRowCount()) } @@ -38,14 +50,16 @@ func TestCsvFileBasicDataTableHeaderColumnNames(t *testing.T) { {"A1", "B1", "C1"}, {"A2", "B2", "C2"}, {"A3", "B3", "C3"}, - }) + }, true) assert.EqualValues(t, []string{"A1", "B1", "C1"}, datatable.HeaderColumnNames()) } func TestCsvFileBasicDataTableHeaderColumnNames_EmptyContent(t *testing.T) { - datatable := CreateNewCustomCsvBasicDataTable([][]string{}) + datatable := CreateNewCustomCsvBasicDataTable([][]string{}, false) + assert.Nil(t, datatable.HeaderColumnNames()) + datatable = CreateNewCustomCsvBasicDataTable([][]string{}, true) assert.Nil(t, datatable.HeaderColumnNames()) } @@ -54,7 +68,34 @@ func TestCsvFileBasicDataTableRowIterator(t *testing.T) { {"A1", "B1", "C1"}, {"A2", "B2", "C2"}, {"A3", "B3", "C3"}, - }) + }, false) + + iterator := datatable.DataRowIterator() + assert.True(t, iterator.HasNext()) + + // data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // data row 3 + assert.NotNil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row 4 + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) +} + +func TestCsvFileBasicDataTableRowIterator_HasTitleLine(t *testing.T) { + datatable := CreateNewCustomCsvBasicDataTable([][]string{ + {"A1", "B1", "C1"}, + {"A2", "B2", "C2"}, + {"A3", "B3", "C3"}, + }, true) iterator := datatable.DataRowIterator() assert.True(t, iterator.HasNext()) @@ -81,7 +122,7 @@ func TestCsvFileBasicDataTableRowColumnCount(t *testing.T) { {"A1", "B1", "C1"}, {"A2", "B2", "C2"}, {"A3", "B3", "C3"}, - }) + }, true) iterator := datatable.DataRowIterator() @@ -97,7 +138,32 @@ func TestCsvFileBasicDataTableRowGetData(t *testing.T) { {"A1", "B1", "C1"}, {"A2", "B2", "C2"}, {"A3", "B3", "C3"}, - }) + }, false) + + iterator := datatable.DataRowIterator() + + row1 := iterator.Next() + assert.Equal(t, "A1", row1.GetData(0)) + assert.Equal(t, "B1", row1.GetData(1)) + assert.Equal(t, "C1", row1.GetData(2)) + + row2 := iterator.Next() + assert.Equal(t, "A2", row2.GetData(0)) + assert.Equal(t, "B2", row2.GetData(1)) + assert.Equal(t, "C2", row2.GetData(2)) + + row3 := iterator.Next() + assert.Equal(t, "A3", row3.GetData(0)) + assert.Equal(t, "B3", row3.GetData(1)) + assert.Equal(t, "C3", row3.GetData(2)) +} + +func TestCsvFileBasicDataTableRowGetData_HasTitleLine(t *testing.T) { + datatable := CreateNewCustomCsvBasicDataTable([][]string{ + {"A1", "B1", "C1"}, + {"A2", "B2", "C2"}, + {"A3", "B3", "C3"}, + }, true) iterator := datatable.DataRowIterator() @@ -117,7 +183,7 @@ func TestCsvFileBasicDataTableRowGetData_GetNotExistedColumnData(t *testing.T) { {"A1", "B1", "C1"}, {"A2", "B2", "C2"}, {"A3", "B3", "C3"}, - }) + }, true) iterator := datatable.DataRowIterator() @@ -130,7 +196,7 @@ func TestCreateNewCsvBasicDataTable(t *testing.T) { reader := bytes.NewReader([]byte("A1,B1,C1\n" + "A2,B2,C2\n" + "A3,B3,C3\n")) - datatable, err := CreateNewCsvBasicDataTable(context, reader) + datatable, err := CreateNewCsvBasicDataTable(context, reader, true) assert.Nil(t, err) assert.Equal(t, 2, datatable.DataRowCount()) @@ -160,7 +226,7 @@ func TestCreateNewCsvBasicDataTable_SkipBlankLine(t *testing.T) { "A2,B2,C2\n" + "\n" + "A3,B3,C3\n")) - datatable, err := CreateNewCsvBasicDataTable(context, reader) + datatable, err := CreateNewCsvBasicDataTable(context, reader, true) assert.Nil(t, err) assert.Equal(t, 2, datatable.DataRowCount()) diff --git a/pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go b/pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go index d7713d57..c5fd2657 100644 --- a/pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go +++ b/pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go @@ -153,11 +153,7 @@ func (c *customTransactionDataDsvFileImporter) ParseImportedData(ctx core.Contex return nil, nil, nil, nil, nil, nil, err } - if !c.hasHeaderLine { - allLines = append([][]string{{}}, allLines...) - } - - dataTable := csvconverter.CreateNewCustomCsvBasicDataTable(allLines) + dataTable := csvconverter.CreateNewCustomCsvBasicDataTable(allLines, c.hasHeaderLine) transactionDataTable := CreateNewCustomPlainTextDataTable(dataTable, c.columnIndexMapping, c.transactionTypeNameMapping, c.timeFormat, c.timezoneFormat, c.amountDecimalSeparator, c.amountDigitGroupingSymbol) dataTableImporter := converter.CreateNewImporterWithTypeNameMapping(customTransactionTypeNameMapping, c.geoLocationSeparator, c.geoLocationOrder, c.transactionTagSeparator) diff --git a/pkg/converters/excel/excel_mscfb_file_basic_data_table.go b/pkg/converters/excel/excel_mscfb_file_basic_data_table.go index 243f13c6..a2dc89b5 100644 --- a/pkg/converters/excel/excel_mscfb_file_basic_data_table.go +++ b/pkg/converters/excel/excel_mscfb_file_basic_data_table.go @@ -14,6 +14,7 @@ import ( type ExcelMSCFBFileBasicDataTable struct { workbook *xls.WorkBook headerLineColumnNames []string + hasTitleLine bool } // ExcelMSCFBFileBasicDataTableRow defines the structure of excel (microsoft compound file binary) file data table row @@ -26,7 +27,7 @@ type ExcelMSCFBFileBasicDataTableRow struct { type ExcelMSCFBFileBasicDataTableRowIterator struct { dataTable *ExcelMSCFBFileBasicDataTable currentSheetIndex int - currentRowIndexInSheet uint16 + currentRowIndexInSheet int } // DataRowCount returns the total count of data row @@ -36,11 +37,23 @@ func (t *ExcelMSCFBFileBasicDataTable) DataRowCount() int { for i := 0; i < t.workbook.NumSheets(); i++ { sheet := t.workbook.GetSheet(i) - if sheet.MaxRow < 1 { + if sheet == nil { continue } - totalDataRowCount += int(sheet.MaxRow) + if t.hasTitleLine { + if sheet.MaxRow < 1 { + continue + } + + totalDataRowCount += int(sheet.MaxRow) + } else { + if sheet.MaxRow <= 0 && sheet.Row(0) == nil { + continue + } + + totalDataRowCount += int(sheet.MaxRow) + 1 + } } return totalDataRowCount @@ -48,15 +61,25 @@ func (t *ExcelMSCFBFileBasicDataTable) DataRowCount() int { // HeaderColumnNames returns the header column name list func (t *ExcelMSCFBFileBasicDataTable) HeaderColumnNames() []string { + if !t.hasTitleLine { + return nil + } + return t.headerLineColumnNames } // DataRowIterator returns the iterator of data row func (t *ExcelMSCFBFileBasicDataTable) DataRowIterator() datatable.BasicDataTableRowIterator { + startIndex := -1 + + if t.hasTitleLine { + startIndex = 0 + } + return &ExcelMSCFBFileBasicDataTableRowIterator{ dataTable: t, currentSheetIndex: 0, - currentRowIndexInSheet: 0, + currentRowIndexInSheet: startIndex, } } @@ -82,15 +105,21 @@ func (t *ExcelMSCFBFileBasicDataTableRowIterator) HasNext() bool { currentSheet := workbook.GetSheet(t.currentSheetIndex) - if t.currentRowIndexInSheet+1 <= currentSheet.MaxRow { + if t.currentRowIndexInSheet+1 <= int(currentSheet.MaxRow) && currentSheet.Row(t.currentRowIndexInSheet+1) != nil { return true } for i := t.currentSheetIndex + 1; i < workbook.NumSheets(); i++ { sheet := workbook.GetSheet(i) - if sheet.MaxRow < 1 { - continue + if t.dataTable.hasTitleLine { + if sheet.MaxRow < 1 { + continue + } + } else { + if sheet.MaxRow <= 0 && sheet.Row(0) == nil { + continue + } } return true @@ -107,20 +136,22 @@ func (t *ExcelMSCFBFileBasicDataTableRowIterator) CurrentRowId() string { // Next returns the next basic data row func (t *ExcelMSCFBFileBasicDataTableRowIterator) Next() datatable.BasicDataTableRow { workbook := t.dataTable.workbook - currentRowIndexInTable := t.currentRowIndexInSheet for i := t.currentSheetIndex; i < workbook.NumSheets(); i++ { sheet := workbook.GetSheet(i) - if currentRowIndexInTable+1 <= sheet.MaxRow { + if t.currentRowIndexInSheet+1 <= int(sheet.MaxRow) && sheet.Row(t.currentRowIndexInSheet+1) != nil { t.currentRowIndexInSheet++ - currentRowIndexInTable = t.currentRowIndexInSheet break } t.currentSheetIndex++ - t.currentRowIndexInSheet = 0 - currentRowIndexInTable = 0 + + if t.dataTable.hasTitleLine { + t.currentRowIndexInSheet = 0 + } else { + t.currentRowIndexInSheet = -1 + } } if t.currentSheetIndex >= workbook.NumSheets() { @@ -129,7 +160,7 @@ func (t *ExcelMSCFBFileBasicDataTableRowIterator) Next() datatable.BasicDataTabl currentSheet := workbook.GetSheet(t.currentSheetIndex) - if t.currentRowIndexInSheet > currentSheet.MaxRow { + if t.currentRowIndexInSheet > int(currentSheet.MaxRow) || currentSheet.Row(t.currentRowIndexInSheet) == nil { return nil } @@ -140,7 +171,7 @@ func (t *ExcelMSCFBFileBasicDataTableRowIterator) Next() datatable.BasicDataTabl } // CreateNewExcelMSCFBFileBasicDataTable returns excel (microsoft compound file binary) data table by file binary data -func CreateNewExcelMSCFBFileBasicDataTable(data []byte) (datatable.BasicDataTable, error) { +func CreateNewExcelMSCFBFileBasicDataTable(data []byte, hasTitleLine bool) (datatable.BasicDataTable, error) { reader := bytes.NewReader(data) workbook, err := xls.OpenReader(reader, "") @@ -148,12 +179,12 @@ func CreateNewExcelMSCFBFileBasicDataTable(data []byte) (datatable.BasicDataTabl return nil, err } - var headerRowItems []string + var firstRowItems []string for i := 0; i < workbook.NumSheets(); i++ { sheet := workbook.GetSheet(i) - if sheet.MaxRow < 0 { + if sheet.MaxRow <= 0 && sheet.Row(0) == nil { continue } @@ -171,21 +202,28 @@ func CreateNewExcelMSCFBFileBasicDataTable(data []byte) (datatable.BasicDataTabl break } - headerRowItems = append(headerRowItems, headerItem) + firstRowItems = append(firstRowItems, headerItem) } } else { - for j := 0; j <= min(row.LastCol(), len(headerRowItems)-1); j++ { + for j := 0; j <= min(row.LastCol(), len(firstRowItems)-1); j++ { headerItem := row.Col(j) - if headerItem != headerRowItems[j] { + if headerItem != firstRowItems[j] { return nil, errs.ErrFieldsInMultiTableAreDifferent } } } } + var headerLineColumnNames []string = nil + + if hasTitleLine { + headerLineColumnNames = firstRowItems + } + return &ExcelMSCFBFileBasicDataTable{ workbook: workbook, - headerLineColumnNames: headerRowItems, + headerLineColumnNames: headerLineColumnNames, + hasTitleLine: hasTitleLine, }, nil } diff --git a/pkg/converters/excel/excel_mscfb_file_basic_data_table_test.go b/pkg/converters/excel/excel_mscfb_file_basic_data_table_test.go index aa4a82eb..742427aa 100644 --- a/pkg/converters/excel/excel_mscfb_file_basic_data_table_test.go +++ b/pkg/converters/excel/excel_mscfb_file_basic_data_table_test.go @@ -13,7 +13,16 @@ func TestExcelMSCFBFileBasicDataTableDataRowCount(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Equal(t, 3, datatable.DataRowCount()) +} + +func TestExcelMSCFBFileBasicDataTableDataRowCount_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") + assert.Nil(t, err) + + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) assert.Nil(t, err) assert.Equal(t, 2, datatable.DataRowCount()) } @@ -22,7 +31,16 @@ func TestExcelMSCFBFileBasicDataTableDataRowCount_MultipleSheets(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Equal(t, 9, datatable.DataRowCount()) +} + +func TestExcelMSCFBFileBasicDataTableDataRowCount_MultipleSheets_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls") + assert.Nil(t, err) + + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) assert.Nil(t, err) assert.Equal(t, 5, datatable.DataRowCount()) } @@ -31,7 +49,7 @@ func TestExcelMSCFBFileBasicDataTableDataRowCount_OnlyHeaderLine(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/only_one_row_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) assert.Nil(t, err) assert.Equal(t, 0, datatable.DataRowCount()) } @@ -40,7 +58,11 @@ func TestExcelMSCFBFileBasicDataTableDataRowCount_EmptyContent(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Equal(t, 0, datatable.DataRowCount()) + + datatable, err = CreateNewExcelMSCFBFileBasicDataTable(testdata, true) assert.Nil(t, err) assert.Equal(t, 0, datatable.DataRowCount()) } @@ -49,7 +71,17 @@ func TestExcelMSCFBFileBasicDataTableHeaderColumnNames(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Nil(t, datatable.HeaderColumnNames()) +} + +func TestExcelMSCFBFileBasicDataTableHeaderColumnNames_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") + assert.Nil(t, err) + + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) + assert.Nil(t, err) assert.EqualValues(t, []string{"A1", "B1", "C1"}, datatable.HeaderColumnNames()) } @@ -57,15 +89,47 @@ func TestExcelMSCFBFileBasicDataTableHeaderColumnNames_EmptyContent(t *testing.T testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Nil(t, datatable.HeaderColumnNames()) + + datatable, err = CreateNewExcelMSCFBFileBasicDataTable(testdata, true) + assert.Nil(t, err) assert.Nil(t, datatable.HeaderColumnNames()) } -func TestExcelMSCFBFileBasicDataTableRowIterator(t *testing.T) { +func TestExcelMSCFBFileBasicDataRowIterator(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + iterator := datatable.DataRowIterator() + assert.True(t, iterator.HasNext()) + + // data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // data row 3 + assert.NotNil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row 4 + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) +} + +func TestExcelMSCFBFileBasicDataRowIterator_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") + assert.Nil(t, err) + + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() assert.True(t, iterator.HasNext()) @@ -86,11 +150,66 @@ func TestExcelMSCFBFileBasicDataTableRowIterator(t *testing.T) { assert.False(t, iterator.HasNext()) } -func TestExcelMSCFBFileBasicDataTableRowIterator_MultipleSheets(t *testing.T) { +func TestExcelMSCFBFileBasicDataRowIterator_MultipleSheets(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + iterator := datatable.DataRowIterator() + assert.True(t, iterator.HasNext()) + + // sheet 1 data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 1 data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 1 data row 3 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 3 data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 3 data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 4 data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 5 data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 5 data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 5 data row 3 + assert.NotNil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) +} + +func TestExcelMSCFBFileBasicDataRowIterator_MultipleSheets_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls") + assert.Nil(t, err) + + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() assert.True(t, iterator.HasNext()) @@ -123,11 +242,12 @@ func TestExcelMSCFBFileBasicDataTableRowIterator_MultipleSheets(t *testing.T) { assert.False(t, iterator.HasNext()) } -func TestExcelMSCFBFileBasicDataTableRowIterator_OnlyHeaderLine(t *testing.T) { +func TestExcelMSCFBFileBasicDataRowIterator_OnlyHeaderLine(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/only_one_row_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() assert.False(t, iterator.HasNext()) @@ -140,11 +260,12 @@ func TestExcelMSCFBFileBasicDataTableRowIterator_OnlyHeaderLine(t *testing.T) { assert.False(t, iterator.HasNext()) } -func TestExcelMSCFBFileBasicDataTableRowIterator_EmptyContent(t *testing.T) { +func TestExcelMSCFBFileBasicDataRowIterator_EmptyContent(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) iterator := datatable.DataRowIterator() assert.False(t, iterator.HasNext()) @@ -155,13 +276,27 @@ func TestExcelMSCFBFileBasicDataTableRowIterator_EmptyContent(t *testing.T) { // not existed data row 2 assert.Nil(t, iterator.Next()) assert.False(t, iterator.HasNext()) + + datatable, err = CreateNewExcelMSCFBFileBasicDataTable(testdata, true) + assert.Nil(t, err) + iterator = datatable.DataRowIterator() + assert.False(t, iterator.HasNext()) + + // not existed data row 1 + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row 2 + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) } -func TestExcelMSCFBFileBasicDataTableRowColumnCount(t *testing.T) { +func TestExcelMSCFBFileBasicDataRowColumnCount(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) iterator := datatable.DataRowIterator() row1 := iterator.Next() @@ -171,11 +306,36 @@ func TestExcelMSCFBFileBasicDataTableRowColumnCount(t *testing.T) { assert.EqualValues(t, 4, row2.ColumnCount()) } -func TestExcelMSCFBFileBasicDataTableRowGetData(t *testing.T) { +func TestExcelMSCFBFileBasicDataRowGetData(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + iterator := datatable.DataRowIterator() + + row1 := iterator.Next() + assert.Equal(t, "A1", row1.GetData(0)) + assert.Equal(t, "B1", row1.GetData(1)) + assert.Equal(t, "C1", row1.GetData(2)) + + row2 := iterator.Next() + assert.Equal(t, "A2", row2.GetData(0)) + assert.Equal(t, "B2", row2.GetData(1)) + assert.Equal(t, "C2", row2.GetData(2)) + + row3 := iterator.Next() + assert.Equal(t, "A3", row3.GetData(0)) + assert.Equal(t, "B3", row3.GetData(1)) + assert.Equal(t, "C3", row3.GetData(2)) +} + +func TestExcelMSCFBFileBasicDataRowGetData_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") + assert.Nil(t, err) + + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() row1 := iterator.Next() @@ -189,22 +349,80 @@ func TestExcelMSCFBFileBasicDataTableRowGetData(t *testing.T) { assert.Equal(t, "C3", row2.GetData(2)) } -func TestExcelMSCFBFileBasicDataTableRowGetData_GetNotExistedColumnData(t *testing.T) { +func TestExcelMSCFBFileBasicDataRowGetData_GetNotExistedColumnData(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) iterator := datatable.DataRowIterator() row1 := iterator.Next() assert.Equal(t, "", row1.GetData(3)) } -func TestExcelMSCFBFileBasicDataTableRowGetData_MultipleSheets(t *testing.T) { +func TestExcelMSCFBFileBasicDataRowGetData_MultipleSheets(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls") assert.Nil(t, err) - datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata) + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false) + assert.Nil(t, err) + iterator := datatable.DataRowIterator() + + sheet1Row1 := iterator.Next() + assert.Equal(t, "A1", sheet1Row1.GetData(0)) + assert.Equal(t, "B1", sheet1Row1.GetData(1)) + assert.Equal(t, "C1", sheet1Row1.GetData(2)) + + sheet1Row2 := iterator.Next() + assert.Equal(t, "1-A2", sheet1Row2.GetData(0)) + assert.Equal(t, "1-B2", sheet1Row2.GetData(1)) + assert.Equal(t, "1-C2", sheet1Row2.GetData(2)) + + sheet1Row3 := iterator.Next() + assert.Equal(t, "1-A3", sheet1Row3.GetData(0)) + assert.Equal(t, "1-B3", sheet1Row3.GetData(1)) + assert.Equal(t, "1-C3", sheet1Row3.GetData(2)) + + // skip empty sheet2 + + sheet3Row1 := iterator.Next() + assert.Equal(t, "A1", sheet3Row1.GetData(0)) + assert.Equal(t, "B1", sheet3Row1.GetData(1)) + assert.Equal(t, "C1", sheet3Row1.GetData(2)) + + sheet3Row2 := iterator.Next() + assert.Equal(t, "3-A2", sheet3Row2.GetData(0)) + assert.Equal(t, "3-B2", sheet3Row2.GetData(1)) + assert.Equal(t, "", sheet3Row2.GetData(2)) + + sheet4Row1 := iterator.Next() + assert.Equal(t, "A1", sheet4Row1.GetData(0)) + assert.Equal(t, "B1", sheet4Row1.GetData(1)) + assert.Equal(t, "C1", sheet4Row1.GetData(2)) + + sheet5Row1 := iterator.Next() + assert.Equal(t, "A1", sheet5Row1.GetData(0)) + assert.Equal(t, "B1", sheet5Row1.GetData(1)) + assert.Equal(t, "C1", sheet5Row1.GetData(2)) + + sheet5Row2 := iterator.Next() + assert.Equal(t, "5-A2", sheet5Row2.GetData(0)) + assert.Equal(t, "5-B2", sheet5Row2.GetData(1)) + assert.Equal(t, "5-C2", sheet5Row2.GetData(2)) + + sheet5Row3 := iterator.Next() + assert.Equal(t, "5-A3", sheet5Row3.GetData(0)) + assert.Equal(t, "5-B3", sheet5Row3.GetData(1)) + assert.Equal(t, "5-C3", sheet5Row3.GetData(2)) +} + +func TestExcelMSCFBFileBasicDataRowGetData_MultipleSheets_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls") + assert.Nil(t, err) + + datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() sheet1Row1 := iterator.Next() @@ -241,6 +459,6 @@ func TestCreateNewExcelMSCFBFileBasicDataTable_MultipleSheetsWithDifferentHeader testdata, err := os.ReadFile("../../../testdata/multiple_sheets_with_different_header_row_excel_file.xls") assert.Nil(t, err) - _, err = CreateNewExcelMSCFBFileBasicDataTable(testdata) + _, err = CreateNewExcelMSCFBFileBasicDataTable(testdata, true) assert.EqualError(t, err, errs.ErrFieldsInMultiTableAreDifferent.Message) } diff --git a/pkg/converters/excel/excel_ooxml_file_basic_data_table.go b/pkg/converters/excel/excel_ooxml_file_basic_data_table.go index c3b4e62f..0b006af2 100644 --- a/pkg/converters/excel/excel_ooxml_file_basic_data_table.go +++ b/pkg/converters/excel/excel_ooxml_file_basic_data_table.go @@ -20,6 +20,7 @@ type excelOOXMLSheet struct { type ExcelOOXMLFileBasicDataTable struct { sheets []*excelOOXMLSheet headerLineColumnNames []string + hasTitleLine bool } // ExcelOOXMLFileBasicDataTableRow defines the structure of excel (Office Open XML) file data table row @@ -47,7 +48,11 @@ func (t *ExcelOOXMLFileBasicDataTable) DataRowCount() int { continue } - totalDataRowCount += len(sheet.allData) - 1 + if t.hasTitleLine { + totalDataRowCount += len(sheet.allData) - 1 + } else { + totalDataRowCount += len(sheet.allData) + } } return totalDataRowCount @@ -55,15 +60,25 @@ func (t *ExcelOOXMLFileBasicDataTable) DataRowCount() int { // HeaderColumnNames returns the header column name list func (t *ExcelOOXMLFileBasicDataTable) HeaderColumnNames() []string { + if !t.hasTitleLine { + return nil + } + return t.headerLineColumnNames } // DataRowIterator returns the iterator of data row func (t *ExcelOOXMLFileBasicDataTable) DataRowIterator() datatable.BasicDataTableRowIterator { + startIndex := -1 + + if t.hasTitleLine { + startIndex = 0 + } + return &ExcelOOXMLFileBasicDataTableRowIterator{ dataTable: t, currentSheetIndex: 0, - currentRowIndexInSheet: 0, + currentRowIndexInSheet: startIndex, } } @@ -98,8 +113,14 @@ func (t *ExcelOOXMLFileBasicDataTableRowIterator) HasNext() bool { for i := t.currentSheetIndex + 1; i < len(sheets); i++ { sheet := sheets[i] - if len(sheet.allData) <= 1 { - continue + if t.dataTable.hasTitleLine { + if len(sheet.allData) <= 1 { + continue + } + } else { + if len(sheet.allData) <= 0 { + continue + } } return true @@ -116,20 +137,22 @@ func (t *ExcelOOXMLFileBasicDataTableRowIterator) CurrentRowId() string { // Next returns the next basic data row func (t *ExcelOOXMLFileBasicDataTableRowIterator) Next() datatable.BasicDataTableRow { sheets := t.dataTable.sheets - currentRowIndexInTable := t.currentRowIndexInSheet for i := t.currentSheetIndex; i < len(sheets); i++ { sheet := sheets[i] - if currentRowIndexInTable+1 < len(sheet.allData) { + if t.currentRowIndexInSheet+1 < len(sheet.allData) { t.currentRowIndexInSheet++ - currentRowIndexInTable = t.currentRowIndexInSheet break } t.currentSheetIndex++ - t.currentRowIndexInSheet = 0 - currentRowIndexInTable = 0 + + if t.dataTable.hasTitleLine { + t.currentRowIndexInSheet = 0 + } else { + t.currentRowIndexInSheet = -1 + } } if t.currentSheetIndex >= len(sheets) { @@ -150,7 +173,7 @@ func (t *ExcelOOXMLFileBasicDataTableRowIterator) Next() datatable.BasicDataTabl } // CreateNewExcelOOXMLFileBasicDataTable returns excel (Office Open XML) data table by file binary data -func CreateNewExcelOOXMLFileBasicDataTable(data []byte) (datatable.BasicDataTable, error) { +func CreateNewExcelOOXMLFileBasicDataTable(data []byte, hasTitleLine bool) (datatable.BasicDataTable, error) { reader := bytes.NewReader(data) file, err := excelize.OpenReader(reader) @@ -161,7 +184,7 @@ func CreateNewExcelOOXMLFileBasicDataTable(data []byte) (datatable.BasicDataTabl } sheetNames := file.GetSheetList() - var headerRowItems []string + var firstRowItems []string var sheets []*excelOOXMLSheet for i := 0; i < len(sheetNames); i++ { @@ -186,13 +209,13 @@ func CreateNewExcelOOXMLFileBasicDataTable(data []byte) (datatable.BasicDataTabl break } - headerRowItems = append(headerRowItems, headerItem) + firstRowItems = append(firstRowItems, headerItem) } } else { - for j := 0; j < min(len(row), len(headerRowItems)); j++ { + for j := 0; j < min(len(row), len(firstRowItems)); j++ { headerItem := row[j] - if headerItem != headerRowItems[j] { + if headerItem != firstRowItems[j] { return nil, errs.ErrFieldsInMultiTableAreDifferent } } @@ -204,8 +227,15 @@ func CreateNewExcelOOXMLFileBasicDataTable(data []byte) (datatable.BasicDataTabl }) } + var headerLineColumnNames []string = nil + + if hasTitleLine { + headerLineColumnNames = firstRowItems + } + return &ExcelOOXMLFileBasicDataTable{ sheets: sheets, - headerLineColumnNames: headerRowItems, + headerLineColumnNames: headerLineColumnNames, + hasTitleLine: hasTitleLine, }, nil } diff --git a/pkg/converters/excel/excel_ooxml_file_basic_data_table_test.go b/pkg/converters/excel/excel_ooxml_file_basic_data_table_test.go index 08cb0b83..c41f4942 100644 --- a/pkg/converters/excel/excel_ooxml_file_basic_data_table_test.go +++ b/pkg/converters/excel/excel_ooxml_file_basic_data_table_test.go @@ -13,7 +13,16 @@ func TestExcelOOXMLFileBasicDataTableDataRowCount(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Equal(t, 3, datatable.DataRowCount()) +} + +func TestExcelOOXMLFileBasicDataTableDataRowCount_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") + assert.Nil(t, err) + + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) assert.Nil(t, err) assert.Equal(t, 2, datatable.DataRowCount()) } @@ -22,7 +31,16 @@ func TestExcelOOXMLFileBasicDataTableDataRowCount_MultipleSheets(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Equal(t, 9, datatable.DataRowCount()) +} + +func TestExcelOOXMLFileBasicDataTableDataRowCount_MultipleSheets_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx") + assert.Nil(t, err) + + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) assert.Nil(t, err) assert.Equal(t, 5, datatable.DataRowCount()) } @@ -31,7 +49,7 @@ func TestExcelOOXMLFileBasicDataTableDataRowCount_OnlyHeaderLine(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/only_one_row_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) assert.Nil(t, err) assert.Equal(t, 0, datatable.DataRowCount()) } @@ -40,7 +58,11 @@ func TestExcelOOXMLFileBasicDataTableDataRowCount_EmptyContent(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Equal(t, 0, datatable.DataRowCount()) + + datatable, err = CreateNewExcelOOXMLFileBasicDataTable(testdata, true) assert.Nil(t, err) assert.Equal(t, 0, datatable.DataRowCount()) } @@ -49,7 +71,17 @@ func TestExcelOOXMLFileBasicDataTableHeaderColumnNames(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Nil(t, datatable.HeaderColumnNames()) +} + +func TestExcelOOXMLFileBasicDataTableHeaderColumnNames_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") + assert.Nil(t, err) + + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) + assert.Nil(t, err) assert.EqualValues(t, []string{"A1", "B1", "C1"}, datatable.HeaderColumnNames()) } @@ -57,7 +89,12 @@ func TestExcelOOXMLFileBasicDataTableHeaderColumnNames_EmptyContent(t *testing.T testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + assert.Nil(t, datatable.HeaderColumnNames()) + + datatable, err = CreateNewExcelOOXMLFileBasicDataTable(testdata, true) + assert.Nil(t, err) assert.Nil(t, datatable.HeaderColumnNames()) } @@ -65,7 +102,34 @@ func TestExcelOOXMLFileBasicDataRowIterator(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + iterator := datatable.DataRowIterator() + assert.True(t, iterator.HasNext()) + + // data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // data row 3 + assert.NotNil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row 4 + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) +} + +func TestExcelOOXMLFileBasicDataRowIterator_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") + assert.Nil(t, err) + + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() assert.True(t, iterator.HasNext()) @@ -90,7 +154,62 @@ func TestExcelOOXMLFileBasicDataRowIterator_MultipleSheets(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + iterator := datatable.DataRowIterator() + assert.True(t, iterator.HasNext()) + + // sheet 1 data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 1 data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 1 data row 3 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 3 data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 3 data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 4 data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 5 data row 1 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 5 data row 2 + assert.NotNil(t, iterator.Next()) + assert.True(t, iterator.HasNext()) + + // sheet 5 data row 3 + assert.NotNil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) +} + +func TestExcelOOXMLFileBasicDataRowIterator_MultipleSheets_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx") + assert.Nil(t, err) + + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() assert.True(t, iterator.HasNext()) @@ -127,7 +246,8 @@ func TestExcelOOXMLFileBasicDataRowIterator_OnlyHeaderLine(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/only_one_row_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() assert.False(t, iterator.HasNext()) @@ -144,7 +264,8 @@ func TestExcelOOXMLFileBasicDataRowIterator_EmptyContent(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) iterator := datatable.DataRowIterator() assert.False(t, iterator.HasNext()) @@ -155,13 +276,27 @@ func TestExcelOOXMLFileBasicDataRowIterator_EmptyContent(t *testing.T) { // not existed data row 2 assert.Nil(t, iterator.Next()) assert.False(t, iterator.HasNext()) + + datatable, err = CreateNewExcelOOXMLFileBasicDataTable(testdata, true) + assert.Nil(t, err) + iterator = datatable.DataRowIterator() + assert.False(t, iterator.HasNext()) + + // not existed data row 1 + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) + + // not existed data row 2 + assert.Nil(t, iterator.Next()) + assert.False(t, iterator.HasNext()) } func TestExcelOOXMLFileBasicDataRowColumnCount(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) iterator := datatable.DataRowIterator() row1 := iterator.Next() @@ -175,7 +310,32 @@ func TestExcelOOXMLFileBasicDataRowGetData(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + iterator := datatable.DataRowIterator() + + row1 := iterator.Next() + assert.Equal(t, "A1", row1.GetData(0)) + assert.Equal(t, "B1", row1.GetData(1)) + assert.Equal(t, "C1", row1.GetData(2)) + + row2 := iterator.Next() + assert.Equal(t, "A2", row2.GetData(0)) + assert.Equal(t, "B2", row2.GetData(1)) + assert.Equal(t, "C2", row2.GetData(2)) + + row3 := iterator.Next() + assert.Equal(t, "A3", row3.GetData(0)) + assert.Equal(t, "B3", row3.GetData(1)) + assert.Equal(t, "C3", row3.GetData(2)) +} + +func TestExcelOOXMLFileBasicDataRowGetData_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") + assert.Nil(t, err) + + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() row1 := iterator.Next() @@ -193,7 +353,8 @@ func TestExcelOOXMLFileBasicDataRowGetData_GetNotExistedColumnData(t *testing.T) testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) iterator := datatable.DataRowIterator() row1 := iterator.Next() @@ -204,7 +365,64 @@ func TestExcelOOXMLFileBasicDataRowGetData_MultipleSheets(t *testing.T) { testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx") assert.Nil(t, err) - datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata) + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false) + assert.Nil(t, err) + iterator := datatable.DataRowIterator() + + sheet1Row1 := iterator.Next() + assert.Equal(t, "A1", sheet1Row1.GetData(0)) + assert.Equal(t, "B1", sheet1Row1.GetData(1)) + assert.Equal(t, "C1", sheet1Row1.GetData(2)) + + sheet1Row2 := iterator.Next() + assert.Equal(t, "1-A2", sheet1Row2.GetData(0)) + assert.Equal(t, "1-B2", sheet1Row2.GetData(1)) + assert.Equal(t, "1-C2", sheet1Row2.GetData(2)) + + sheet1Row3 := iterator.Next() + assert.Equal(t, "1-A3", sheet1Row3.GetData(0)) + assert.Equal(t, "1-B3", sheet1Row3.GetData(1)) + assert.Equal(t, "1-C3", sheet1Row3.GetData(2)) + + // skip empty sheet2 + + sheet3Row1 := iterator.Next() + assert.Equal(t, "A1", sheet3Row1.GetData(0)) + assert.Equal(t, "B1", sheet3Row1.GetData(1)) + assert.Equal(t, "C1", sheet3Row1.GetData(2)) + + sheet3Row2 := iterator.Next() + assert.Equal(t, "3-A2", sheet3Row2.GetData(0)) + assert.Equal(t, "3-B2", sheet3Row2.GetData(1)) + assert.Equal(t, "", sheet3Row2.GetData(2)) + + sheet4Row1 := iterator.Next() + assert.Equal(t, "A1", sheet4Row1.GetData(0)) + assert.Equal(t, "B1", sheet4Row1.GetData(1)) + assert.Equal(t, "C1", sheet4Row1.GetData(2)) + + sheet5Row1 := iterator.Next() + assert.Equal(t, "A1", sheet5Row1.GetData(0)) + assert.Equal(t, "B1", sheet5Row1.GetData(1)) + assert.Equal(t, "C1", sheet5Row1.GetData(2)) + + sheet5Row2 := iterator.Next() + assert.Equal(t, "5-A2", sheet5Row2.GetData(0)) + assert.Equal(t, "5-B2", sheet5Row2.GetData(1)) + assert.Equal(t, "5-C2", sheet5Row2.GetData(2)) + + sheet5Row3 := iterator.Next() + assert.Equal(t, "5-A3", sheet5Row3.GetData(0)) + assert.Equal(t, "5-B3", sheet5Row3.GetData(1)) + assert.Equal(t, "5-C3", sheet5Row3.GetData(2)) +} + +func TestExcelOOXMLFileBasicDataRowGetData_MultipleSheets_HasTitleLine(t *testing.T) { + testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx") + assert.Nil(t, err) + + datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true) + assert.Nil(t, err) iterator := datatable.DataRowIterator() sheet1Row1 := iterator.Next() @@ -241,6 +459,6 @@ func TestCreateNewExcelOOXMLFileBasicDataTable_MultipleSheetsWithDifferentHeader testdata, err := os.ReadFile("../../../testdata/multiple_sheets_with_different_header_row_excel_file.xlsx") assert.Nil(t, err) - _, err = CreateNewExcelOOXMLFileBasicDataTable(testdata) + _, err = CreateNewExcelOOXMLFileBasicDataTable(testdata, true) assert.EqualError(t, err, errs.ErrFieldsInMultiTableAreDifferent.Message) } diff --git a/pkg/converters/feidee/feidee_mymoney_app_transaction_data_csv_file_importer.go b/pkg/converters/feidee/feidee_mymoney_app_transaction_data_csv_file_importer.go index 01481c6a..7cb93248 100644 --- a/pkg/converters/feidee/feidee_mymoney_app_transaction_data_csv_file_importer.go +++ b/pkg/converters/feidee/feidee_mymoney_app_transaction_data_csv_file_importer.go @@ -132,7 +132,7 @@ func (c *feideeMymoneyAppTransactionDataCsvFileImporter) createNewFeideeMymoneyA return nil, errs.ErrNotFoundTransactionDataInFile } - dataTable := csvdatatable.CreateNewCustomCsvBasicDataTable(allOriginalLines) + dataTable := csvdatatable.CreateNewCustomCsvBasicDataTable(allOriginalLines, true) return dataTable, nil } diff --git a/pkg/converters/feidee/feidee_mymoney_elecloud_transaction_data_xlsx_file_importer.go b/pkg/converters/feidee/feidee_mymoney_elecloud_transaction_data_xlsx_file_importer.go index 50e990c7..b1fe391d 100644 --- a/pkg/converters/feidee/feidee_mymoney_elecloud_transaction_data_xlsx_file_importer.go +++ b/pkg/converters/feidee/feidee_mymoney_elecloud_transaction_data_xlsx_file_importer.go @@ -32,7 +32,7 @@ var ( // ParseImportedData returns the imported data by parsing the feidee mymoney (elecloud) transaction xlsx data func (c *feideeMymoneyElecloudTransactionDataXlsxFileImporter) ParseImportedData(ctx core.Context, user *models.User, data []byte, defaultTimezoneOffset int16, accountMap map[string]*models.Account, expenseCategoryMap map[string]map[string]*models.TransactionCategory, incomeCategoryMap map[string]map[string]*models.TransactionCategory, transferCategoryMap map[string]map[string]*models.TransactionCategory, tagMap map[string]*models.TransactionTag) (models.ImportedTransactionSlice, []*models.Account, []*models.TransactionCategory, []*models.TransactionCategory, []*models.TransactionCategory, []*models.TransactionTag, error) { - dataTable, err := excel.CreateNewExcelOOXMLFileBasicDataTable(data) + dataTable, err := excel.CreateNewExcelOOXMLFileBasicDataTable(data, true) if err != nil { return nil, nil, nil, nil, nil, nil, err diff --git a/pkg/converters/feidee/feidee_mymoney_web_transaction_data_xls_file_importer.go b/pkg/converters/feidee/feidee_mymoney_web_transaction_data_xls_file_importer.go index feb993b3..c6e8f3e5 100644 --- a/pkg/converters/feidee/feidee_mymoney_web_transaction_data_xls_file_importer.go +++ b/pkg/converters/feidee/feidee_mymoney_web_transaction_data_xls_file_importer.go @@ -31,7 +31,7 @@ var ( // ParseImportedData returns the imported data by parsing the feidee mymoney (web) transaction xls data func (c *feideeMymoneyWebTransactionDataXlsFileImporter) ParseImportedData(ctx core.Context, user *models.User, data []byte, defaultTimezoneOffset int16, accountMap map[string]*models.Account, expenseCategoryMap map[string]map[string]*models.TransactionCategory, incomeCategoryMap map[string]map[string]*models.TransactionCategory, transferCategoryMap map[string]map[string]*models.TransactionCategory, tagMap map[string]*models.TransactionTag) (models.ImportedTransactionSlice, []*models.Account, []*models.TransactionCategory, []*models.TransactionCategory, []*models.TransactionCategory, []*models.TransactionTag, error) { - dataTable, err := excel.CreateNewExcelMSCFBFileBasicDataTable(data) + dataTable, err := excel.CreateNewExcelMSCFBFileBasicDataTable(data, true) if err != nil { return nil, nil, nil, nil, nil, nil, err diff --git a/pkg/converters/fireflyIII/fireflyiii_transaction_data_csv_file_importer.go b/pkg/converters/fireflyIII/fireflyiii_transaction_data_csv_file_importer.go index 1bd7db51..5860c443 100644 --- a/pkg/converters/fireflyIII/fireflyiii_transaction_data_csv_file_importer.go +++ b/pkg/converters/fireflyIII/fireflyiii_transaction_data_csv_file_importer.go @@ -42,7 +42,7 @@ var ( // ParseImportedData returns the imported data by parsing the firefly III transaction csv data func (c *fireflyIIITransactionDataCsvFileImporter) ParseImportedData(ctx core.Context, user *models.User, data []byte, defaultTimezoneOffset int16, accountMap map[string]*models.Account, expenseCategoryMap map[string]map[string]*models.TransactionCategory, incomeCategoryMap map[string]map[string]*models.TransactionCategory, transferCategoryMap map[string]map[string]*models.TransactionCategory, tagMap map[string]*models.TransactionTag) (models.ImportedTransactionSlice, []*models.Account, []*models.TransactionCategory, []*models.TransactionCategory, []*models.TransactionCategory, []*models.TransactionTag, error) { reader := bytes.NewReader(data) - dataTable, err := csv.CreateNewCsvBasicDataTable(ctx, reader) + dataTable, err := csv.CreateNewCsvBasicDataTable(ctx, reader, true) if err != nil { return nil, nil, nil, nil, nil, nil, err diff --git a/pkg/converters/wechat/wechat_pay_transaction_data_csv_file_importer.go b/pkg/converters/wechat/wechat_pay_transaction_data_csv_file_importer.go index 7f6b2afc..d9357f17 100644 --- a/pkg/converters/wechat/wechat_pay_transaction_data_csv_file_importer.go +++ b/pkg/converters/wechat/wechat_pay_transaction_data_csv_file_importer.go @@ -143,7 +143,7 @@ func (c *wechatPayTransactionDataCsvFileImporter) createNewWeChatPayBasicDataTab return nil, errs.ErrNotFoundTransactionDataInFile } - dataTable := csvdatatable.CreateNewCustomCsvBasicDataTable(allOriginalLines) + dataTable := csvdatatable.CreateNewCustomCsvBasicDataTable(allOriginalLines, true) return dataTable, nil }