support setting whether the data table in csv / xls / xlsx files contains a header row

This commit is contained in:
MaysWind
2025-08-08 20:11:31 +08:00
parent 351cebe169
commit ecf6fbd187
13 changed files with 688 additions and 105 deletions
@@ -14,6 +14,7 @@ import (
type ExcelMSCFBFileBasicDataTable struct {
workbook *xls.WorkBook
headerLineColumnNames []string
hasTitleLine bool
}
// ExcelMSCFBFileBasicDataTableRow defines the structure of excel (microsoft compound file binary) file data table row
@@ -26,7 +27,7 @@ type ExcelMSCFBFileBasicDataTableRow struct {
type ExcelMSCFBFileBasicDataTableRowIterator struct {
dataTable *ExcelMSCFBFileBasicDataTable
currentSheetIndex int
currentRowIndexInSheet uint16
currentRowIndexInSheet int
}
// DataRowCount returns the total count of data row
@@ -36,11 +37,23 @@ func (t *ExcelMSCFBFileBasicDataTable) DataRowCount() int {
for i := 0; i < t.workbook.NumSheets(); i++ {
sheet := t.workbook.GetSheet(i)
if sheet.MaxRow < 1 {
if sheet == nil {
continue
}
totalDataRowCount += int(sheet.MaxRow)
if t.hasTitleLine {
if sheet.MaxRow < 1 {
continue
}
totalDataRowCount += int(sheet.MaxRow)
} else {
if sheet.MaxRow <= 0 && sheet.Row(0) == nil {
continue
}
totalDataRowCount += int(sheet.MaxRow) + 1
}
}
return totalDataRowCount
@@ -48,15 +61,25 @@ func (t *ExcelMSCFBFileBasicDataTable) DataRowCount() int {
// HeaderColumnNames returns the header column name list
func (t *ExcelMSCFBFileBasicDataTable) HeaderColumnNames() []string {
if !t.hasTitleLine {
return nil
}
return t.headerLineColumnNames
}
// DataRowIterator returns the iterator of data row
func (t *ExcelMSCFBFileBasicDataTable) DataRowIterator() datatable.BasicDataTableRowIterator {
startIndex := -1
if t.hasTitleLine {
startIndex = 0
}
return &ExcelMSCFBFileBasicDataTableRowIterator{
dataTable: t,
currentSheetIndex: 0,
currentRowIndexInSheet: 0,
currentRowIndexInSheet: startIndex,
}
}
@@ -82,15 +105,21 @@ func (t *ExcelMSCFBFileBasicDataTableRowIterator) HasNext() bool {
currentSheet := workbook.GetSheet(t.currentSheetIndex)
if t.currentRowIndexInSheet+1 <= currentSheet.MaxRow {
if t.currentRowIndexInSheet+1 <= int(currentSheet.MaxRow) && currentSheet.Row(t.currentRowIndexInSheet+1) != nil {
return true
}
for i := t.currentSheetIndex + 1; i < workbook.NumSheets(); i++ {
sheet := workbook.GetSheet(i)
if sheet.MaxRow < 1 {
continue
if t.dataTable.hasTitleLine {
if sheet.MaxRow < 1 {
continue
}
} else {
if sheet.MaxRow <= 0 && sheet.Row(0) == nil {
continue
}
}
return true
@@ -107,20 +136,22 @@ func (t *ExcelMSCFBFileBasicDataTableRowIterator) CurrentRowId() string {
// Next returns the next basic data row
func (t *ExcelMSCFBFileBasicDataTableRowIterator) Next() datatable.BasicDataTableRow {
workbook := t.dataTable.workbook
currentRowIndexInTable := t.currentRowIndexInSheet
for i := t.currentSheetIndex; i < workbook.NumSheets(); i++ {
sheet := workbook.GetSheet(i)
if currentRowIndexInTable+1 <= sheet.MaxRow {
if t.currentRowIndexInSheet+1 <= int(sheet.MaxRow) && sheet.Row(t.currentRowIndexInSheet+1) != nil {
t.currentRowIndexInSheet++
currentRowIndexInTable = t.currentRowIndexInSheet
break
}
t.currentSheetIndex++
t.currentRowIndexInSheet = 0
currentRowIndexInTable = 0
if t.dataTable.hasTitleLine {
t.currentRowIndexInSheet = 0
} else {
t.currentRowIndexInSheet = -1
}
}
if t.currentSheetIndex >= workbook.NumSheets() {
@@ -129,7 +160,7 @@ func (t *ExcelMSCFBFileBasicDataTableRowIterator) Next() datatable.BasicDataTabl
currentSheet := workbook.GetSheet(t.currentSheetIndex)
if t.currentRowIndexInSheet > currentSheet.MaxRow {
if t.currentRowIndexInSheet > int(currentSheet.MaxRow) || currentSheet.Row(t.currentRowIndexInSheet) == nil {
return nil
}
@@ -140,7 +171,7 @@ func (t *ExcelMSCFBFileBasicDataTableRowIterator) Next() datatable.BasicDataTabl
}
// CreateNewExcelMSCFBFileBasicDataTable returns excel (microsoft compound file binary) data table by file binary data
func CreateNewExcelMSCFBFileBasicDataTable(data []byte) (datatable.BasicDataTable, error) {
func CreateNewExcelMSCFBFileBasicDataTable(data []byte, hasTitleLine bool) (datatable.BasicDataTable, error) {
reader := bytes.NewReader(data)
workbook, err := xls.OpenReader(reader, "")
@@ -148,12 +179,12 @@ func CreateNewExcelMSCFBFileBasicDataTable(data []byte) (datatable.BasicDataTabl
return nil, err
}
var headerRowItems []string
var firstRowItems []string
for i := 0; i < workbook.NumSheets(); i++ {
sheet := workbook.GetSheet(i)
if sheet.MaxRow < 0 {
if sheet.MaxRow <= 0 && sheet.Row(0) == nil {
continue
}
@@ -171,21 +202,28 @@ func CreateNewExcelMSCFBFileBasicDataTable(data []byte) (datatable.BasicDataTabl
break
}
headerRowItems = append(headerRowItems, headerItem)
firstRowItems = append(firstRowItems, headerItem)
}
} else {
for j := 0; j <= min(row.LastCol(), len(headerRowItems)-1); j++ {
for j := 0; j <= min(row.LastCol(), len(firstRowItems)-1); j++ {
headerItem := row.Col(j)
if headerItem != headerRowItems[j] {
if headerItem != firstRowItems[j] {
return nil, errs.ErrFieldsInMultiTableAreDifferent
}
}
}
}
var headerLineColumnNames []string = nil
if hasTitleLine {
headerLineColumnNames = firstRowItems
}
return &ExcelMSCFBFileBasicDataTable{
workbook: workbook,
headerLineColumnNames: headerRowItems,
headerLineColumnNames: headerLineColumnNames,
hasTitleLine: hasTitleLine,
}, nil
}
@@ -13,7 +13,16 @@ func TestExcelMSCFBFileBasicDataTableDataRowCount(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Equal(t, 3, datatable.DataRowCount())
}
func TestExcelMSCFBFileBasicDataTableDataRowCount_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Equal(t, 2, datatable.DataRowCount())
}
@@ -22,7 +31,16 @@ func TestExcelMSCFBFileBasicDataTableDataRowCount_MultipleSheets(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Equal(t, 9, datatable.DataRowCount())
}
func TestExcelMSCFBFileBasicDataTableDataRowCount_MultipleSheets_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Equal(t, 5, datatable.DataRowCount())
}
@@ -31,7 +49,7 @@ func TestExcelMSCFBFileBasicDataTableDataRowCount_OnlyHeaderLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/only_one_row_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Equal(t, 0, datatable.DataRowCount())
}
@@ -40,7 +58,11 @@ func TestExcelMSCFBFileBasicDataTableDataRowCount_EmptyContent(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Equal(t, 0, datatable.DataRowCount())
datatable, err = CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Equal(t, 0, datatable.DataRowCount())
}
@@ -49,7 +71,17 @@ func TestExcelMSCFBFileBasicDataTableHeaderColumnNames(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Nil(t, datatable.HeaderColumnNames())
}
func TestExcelMSCFBFileBasicDataTableHeaderColumnNames_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.EqualValues(t, []string{"A1", "B1", "C1"}, datatable.HeaderColumnNames())
}
@@ -57,15 +89,47 @@ func TestExcelMSCFBFileBasicDataTableHeaderColumnNames_EmptyContent(t *testing.T
testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Nil(t, datatable.HeaderColumnNames())
datatable, err = CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Nil(t, datatable.HeaderColumnNames())
}
func TestExcelMSCFBFileBasicDataTableRowIterator(t *testing.T) {
func TestExcelMSCFBFileBasicDataRowIterator(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.True(t, iterator.HasNext())
// data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// data row 2
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// data row 3
assert.NotNil(t, iterator.Next())
assert.False(t, iterator.HasNext())
// not existed data row 4
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
}
func TestExcelMSCFBFileBasicDataRowIterator_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.True(t, iterator.HasNext())
@@ -86,11 +150,66 @@ func TestExcelMSCFBFileBasicDataTableRowIterator(t *testing.T) {
assert.False(t, iterator.HasNext())
}
func TestExcelMSCFBFileBasicDataTableRowIterator_MultipleSheets(t *testing.T) {
func TestExcelMSCFBFileBasicDataRowIterator_MultipleSheets(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.True(t, iterator.HasNext())
// sheet 1 data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 1 data row 2
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 1 data row 3
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 3 data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 3 data row 2
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 4 data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 5 data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 5 data row 2
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 5 data row 3
assert.NotNil(t, iterator.Next())
assert.False(t, iterator.HasNext())
// not existed data row
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
// not existed data row
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
}
func TestExcelMSCFBFileBasicDataRowIterator_MultipleSheets_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.True(t, iterator.HasNext())
@@ -123,11 +242,12 @@ func TestExcelMSCFBFileBasicDataTableRowIterator_MultipleSheets(t *testing.T) {
assert.False(t, iterator.HasNext())
}
func TestExcelMSCFBFileBasicDataTableRowIterator_OnlyHeaderLine(t *testing.T) {
func TestExcelMSCFBFileBasicDataRowIterator_OnlyHeaderLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/only_one_row_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.False(t, iterator.HasNext())
@@ -140,11 +260,12 @@ func TestExcelMSCFBFileBasicDataTableRowIterator_OnlyHeaderLine(t *testing.T) {
assert.False(t, iterator.HasNext())
}
func TestExcelMSCFBFileBasicDataTableRowIterator_EmptyContent(t *testing.T) {
func TestExcelMSCFBFileBasicDataRowIterator_EmptyContent(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.False(t, iterator.HasNext())
@@ -155,13 +276,27 @@ func TestExcelMSCFBFileBasicDataTableRowIterator_EmptyContent(t *testing.T) {
// not existed data row 2
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
datatable, err = CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator = datatable.DataRowIterator()
assert.False(t, iterator.HasNext())
// not existed data row 1
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
// not existed data row 2
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
}
func TestExcelMSCFBFileBasicDataTableRowColumnCount(t *testing.T) {
func TestExcelMSCFBFileBasicDataRowColumnCount(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
row1 := iterator.Next()
@@ -171,11 +306,36 @@ func TestExcelMSCFBFileBasicDataTableRowColumnCount(t *testing.T) {
assert.EqualValues(t, 4, row2.ColumnCount())
}
func TestExcelMSCFBFileBasicDataTableRowGetData(t *testing.T) {
func TestExcelMSCFBFileBasicDataRowGetData(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
row1 := iterator.Next()
assert.Equal(t, "A1", row1.GetData(0))
assert.Equal(t, "B1", row1.GetData(1))
assert.Equal(t, "C1", row1.GetData(2))
row2 := iterator.Next()
assert.Equal(t, "A2", row2.GetData(0))
assert.Equal(t, "B2", row2.GetData(1))
assert.Equal(t, "C2", row2.GetData(2))
row3 := iterator.Next()
assert.Equal(t, "A3", row3.GetData(0))
assert.Equal(t, "B3", row3.GetData(1))
assert.Equal(t, "C3", row3.GetData(2))
}
func TestExcelMSCFBFileBasicDataRowGetData_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
row1 := iterator.Next()
@@ -189,22 +349,80 @@ func TestExcelMSCFBFileBasicDataTableRowGetData(t *testing.T) {
assert.Equal(t, "C3", row2.GetData(2))
}
func TestExcelMSCFBFileBasicDataTableRowGetData_GetNotExistedColumnData(t *testing.T) {
func TestExcelMSCFBFileBasicDataRowGetData_GetNotExistedColumnData(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
row1 := iterator.Next()
assert.Equal(t, "", row1.GetData(3))
}
func TestExcelMSCFBFileBasicDataTableRowGetData_MultipleSheets(t *testing.T) {
func TestExcelMSCFBFileBasicDataRowGetData_MultipleSheets(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
sheet1Row1 := iterator.Next()
assert.Equal(t, "A1", sheet1Row1.GetData(0))
assert.Equal(t, "B1", sheet1Row1.GetData(1))
assert.Equal(t, "C1", sheet1Row1.GetData(2))
sheet1Row2 := iterator.Next()
assert.Equal(t, "1-A2", sheet1Row2.GetData(0))
assert.Equal(t, "1-B2", sheet1Row2.GetData(1))
assert.Equal(t, "1-C2", sheet1Row2.GetData(2))
sheet1Row3 := iterator.Next()
assert.Equal(t, "1-A3", sheet1Row3.GetData(0))
assert.Equal(t, "1-B3", sheet1Row3.GetData(1))
assert.Equal(t, "1-C3", sheet1Row3.GetData(2))
// skip empty sheet2
sheet3Row1 := iterator.Next()
assert.Equal(t, "A1", sheet3Row1.GetData(0))
assert.Equal(t, "B1", sheet3Row1.GetData(1))
assert.Equal(t, "C1", sheet3Row1.GetData(2))
sheet3Row2 := iterator.Next()
assert.Equal(t, "3-A2", sheet3Row2.GetData(0))
assert.Equal(t, "3-B2", sheet3Row2.GetData(1))
assert.Equal(t, "", sheet3Row2.GetData(2))
sheet4Row1 := iterator.Next()
assert.Equal(t, "A1", sheet4Row1.GetData(0))
assert.Equal(t, "B1", sheet4Row1.GetData(1))
assert.Equal(t, "C1", sheet4Row1.GetData(2))
sheet5Row1 := iterator.Next()
assert.Equal(t, "A1", sheet5Row1.GetData(0))
assert.Equal(t, "B1", sheet5Row1.GetData(1))
assert.Equal(t, "C1", sheet5Row1.GetData(2))
sheet5Row2 := iterator.Next()
assert.Equal(t, "5-A2", sheet5Row2.GetData(0))
assert.Equal(t, "5-B2", sheet5Row2.GetData(1))
assert.Equal(t, "5-C2", sheet5Row2.GetData(2))
sheet5Row3 := iterator.Next()
assert.Equal(t, "5-A3", sheet5Row3.GetData(0))
assert.Equal(t, "5-B3", sheet5Row3.GetData(1))
assert.Equal(t, "5-C3", sheet5Row3.GetData(2))
}
func TestExcelMSCFBFileBasicDataRowGetData_MultipleSheets_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xls")
assert.Nil(t, err)
datatable, err := CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
sheet1Row1 := iterator.Next()
@@ -241,6 +459,6 @@ func TestCreateNewExcelMSCFBFileBasicDataTable_MultipleSheetsWithDifferentHeader
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_with_different_header_row_excel_file.xls")
assert.Nil(t, err)
_, err = CreateNewExcelMSCFBFileBasicDataTable(testdata)
_, err = CreateNewExcelMSCFBFileBasicDataTable(testdata, true)
assert.EqualError(t, err, errs.ErrFieldsInMultiTableAreDifferent.Message)
}
@@ -20,6 +20,7 @@ type excelOOXMLSheet struct {
type ExcelOOXMLFileBasicDataTable struct {
sheets []*excelOOXMLSheet
headerLineColumnNames []string
hasTitleLine bool
}
// ExcelOOXMLFileBasicDataTableRow defines the structure of excel (Office Open XML) file data table row
@@ -47,7 +48,11 @@ func (t *ExcelOOXMLFileBasicDataTable) DataRowCount() int {
continue
}
totalDataRowCount += len(sheet.allData) - 1
if t.hasTitleLine {
totalDataRowCount += len(sheet.allData) - 1
} else {
totalDataRowCount += len(sheet.allData)
}
}
return totalDataRowCount
@@ -55,15 +60,25 @@ func (t *ExcelOOXMLFileBasicDataTable) DataRowCount() int {
// HeaderColumnNames returns the header column name list
func (t *ExcelOOXMLFileBasicDataTable) HeaderColumnNames() []string {
if !t.hasTitleLine {
return nil
}
return t.headerLineColumnNames
}
// DataRowIterator returns the iterator of data row
func (t *ExcelOOXMLFileBasicDataTable) DataRowIterator() datatable.BasicDataTableRowIterator {
startIndex := -1
if t.hasTitleLine {
startIndex = 0
}
return &ExcelOOXMLFileBasicDataTableRowIterator{
dataTable: t,
currentSheetIndex: 0,
currentRowIndexInSheet: 0,
currentRowIndexInSheet: startIndex,
}
}
@@ -98,8 +113,14 @@ func (t *ExcelOOXMLFileBasicDataTableRowIterator) HasNext() bool {
for i := t.currentSheetIndex + 1; i < len(sheets); i++ {
sheet := sheets[i]
if len(sheet.allData) <= 1 {
continue
if t.dataTable.hasTitleLine {
if len(sheet.allData) <= 1 {
continue
}
} else {
if len(sheet.allData) <= 0 {
continue
}
}
return true
@@ -116,20 +137,22 @@ func (t *ExcelOOXMLFileBasicDataTableRowIterator) CurrentRowId() string {
// Next returns the next basic data row
func (t *ExcelOOXMLFileBasicDataTableRowIterator) Next() datatable.BasicDataTableRow {
sheets := t.dataTable.sheets
currentRowIndexInTable := t.currentRowIndexInSheet
for i := t.currentSheetIndex; i < len(sheets); i++ {
sheet := sheets[i]
if currentRowIndexInTable+1 < len(sheet.allData) {
if t.currentRowIndexInSheet+1 < len(sheet.allData) {
t.currentRowIndexInSheet++
currentRowIndexInTable = t.currentRowIndexInSheet
break
}
t.currentSheetIndex++
t.currentRowIndexInSheet = 0
currentRowIndexInTable = 0
if t.dataTable.hasTitleLine {
t.currentRowIndexInSheet = 0
} else {
t.currentRowIndexInSheet = -1
}
}
if t.currentSheetIndex >= len(sheets) {
@@ -150,7 +173,7 @@ func (t *ExcelOOXMLFileBasicDataTableRowIterator) Next() datatable.BasicDataTabl
}
// CreateNewExcelOOXMLFileBasicDataTable returns excel (Office Open XML) data table by file binary data
func CreateNewExcelOOXMLFileBasicDataTable(data []byte) (datatable.BasicDataTable, error) {
func CreateNewExcelOOXMLFileBasicDataTable(data []byte, hasTitleLine bool) (datatable.BasicDataTable, error) {
reader := bytes.NewReader(data)
file, err := excelize.OpenReader(reader)
@@ -161,7 +184,7 @@ func CreateNewExcelOOXMLFileBasicDataTable(data []byte) (datatable.BasicDataTabl
}
sheetNames := file.GetSheetList()
var headerRowItems []string
var firstRowItems []string
var sheets []*excelOOXMLSheet
for i := 0; i < len(sheetNames); i++ {
@@ -186,13 +209,13 @@ func CreateNewExcelOOXMLFileBasicDataTable(data []byte) (datatable.BasicDataTabl
break
}
headerRowItems = append(headerRowItems, headerItem)
firstRowItems = append(firstRowItems, headerItem)
}
} else {
for j := 0; j < min(len(row), len(headerRowItems)); j++ {
for j := 0; j < min(len(row), len(firstRowItems)); j++ {
headerItem := row[j]
if headerItem != headerRowItems[j] {
if headerItem != firstRowItems[j] {
return nil, errs.ErrFieldsInMultiTableAreDifferent
}
}
@@ -204,8 +227,15 @@ func CreateNewExcelOOXMLFileBasicDataTable(data []byte) (datatable.BasicDataTabl
})
}
var headerLineColumnNames []string = nil
if hasTitleLine {
headerLineColumnNames = firstRowItems
}
return &ExcelOOXMLFileBasicDataTable{
sheets: sheets,
headerLineColumnNames: headerRowItems,
headerLineColumnNames: headerLineColumnNames,
hasTitleLine: hasTitleLine,
}, nil
}
@@ -13,7 +13,16 @@ func TestExcelOOXMLFileBasicDataTableDataRowCount(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Equal(t, 3, datatable.DataRowCount())
}
func TestExcelOOXMLFileBasicDataTableDataRowCount_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Equal(t, 2, datatable.DataRowCount())
}
@@ -22,7 +31,16 @@ func TestExcelOOXMLFileBasicDataTableDataRowCount_MultipleSheets(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Equal(t, 9, datatable.DataRowCount())
}
func TestExcelOOXMLFileBasicDataTableDataRowCount_MultipleSheets_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Equal(t, 5, datatable.DataRowCount())
}
@@ -31,7 +49,7 @@ func TestExcelOOXMLFileBasicDataTableDataRowCount_OnlyHeaderLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/only_one_row_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Equal(t, 0, datatable.DataRowCount())
}
@@ -40,7 +58,11 @@ func TestExcelOOXMLFileBasicDataTableDataRowCount_EmptyContent(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Equal(t, 0, datatable.DataRowCount())
datatable, err = CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Equal(t, 0, datatable.DataRowCount())
}
@@ -49,7 +71,17 @@ func TestExcelOOXMLFileBasicDataTableHeaderColumnNames(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Nil(t, datatable.HeaderColumnNames())
}
func TestExcelOOXMLFileBasicDataTableHeaderColumnNames_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.EqualValues(t, []string{"A1", "B1", "C1"}, datatable.HeaderColumnNames())
}
@@ -57,7 +89,12 @@ func TestExcelOOXMLFileBasicDataTableHeaderColumnNames_EmptyContent(t *testing.T
testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
assert.Nil(t, datatable.HeaderColumnNames())
datatable, err = CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
assert.Nil(t, datatable.HeaderColumnNames())
}
@@ -65,7 +102,34 @@ func TestExcelOOXMLFileBasicDataRowIterator(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.True(t, iterator.HasNext())
// data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// data row 2
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// data row 3
assert.NotNil(t, iterator.Next())
assert.False(t, iterator.HasNext())
// not existed data row 4
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
}
func TestExcelOOXMLFileBasicDataRowIterator_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.True(t, iterator.HasNext())
@@ -90,7 +154,62 @@ func TestExcelOOXMLFileBasicDataRowIterator_MultipleSheets(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.True(t, iterator.HasNext())
// sheet 1 data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 1 data row 2
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 1 data row 3
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 3 data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 3 data row 2
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 4 data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 5 data row 1
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 5 data row 2
assert.NotNil(t, iterator.Next())
assert.True(t, iterator.HasNext())
// sheet 5 data row 3
assert.NotNil(t, iterator.Next())
assert.False(t, iterator.HasNext())
// not existed data row
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
// not existed data row
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
}
func TestExcelOOXMLFileBasicDataRowIterator_MultipleSheets_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.True(t, iterator.HasNext())
@@ -127,7 +246,8 @@ func TestExcelOOXMLFileBasicDataRowIterator_OnlyHeaderLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/only_one_row_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.False(t, iterator.HasNext())
@@ -144,7 +264,8 @@ func TestExcelOOXMLFileBasicDataRowIterator_EmptyContent(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/empty_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
assert.False(t, iterator.HasNext())
@@ -155,13 +276,27 @@ func TestExcelOOXMLFileBasicDataRowIterator_EmptyContent(t *testing.T) {
// not existed data row 2
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
datatable, err = CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator = datatable.DataRowIterator()
assert.False(t, iterator.HasNext())
// not existed data row 1
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
// not existed data row 2
assert.Nil(t, iterator.Next())
assert.False(t, iterator.HasNext())
}
func TestExcelOOXMLFileBasicDataRowColumnCount(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
row1 := iterator.Next()
@@ -175,7 +310,32 @@ func TestExcelOOXMLFileBasicDataRowGetData(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
row1 := iterator.Next()
assert.Equal(t, "A1", row1.GetData(0))
assert.Equal(t, "B1", row1.GetData(1))
assert.Equal(t, "C1", row1.GetData(2))
row2 := iterator.Next()
assert.Equal(t, "A2", row2.GetData(0))
assert.Equal(t, "B2", row2.GetData(1))
assert.Equal(t, "C2", row2.GetData(2))
row3 := iterator.Next()
assert.Equal(t, "A3", row3.GetData(0))
assert.Equal(t, "B3", row3.GetData(1))
assert.Equal(t, "C3", row3.GetData(2))
}
func TestExcelOOXMLFileBasicDataRowGetData_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
row1 := iterator.Next()
@@ -193,7 +353,8 @@ func TestExcelOOXMLFileBasicDataRowGetData_GetNotExistedColumnData(t *testing.T)
testdata, err := os.ReadFile("../../../testdata/simple_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
row1 := iterator.Next()
@@ -204,7 +365,64 @@ func TestExcelOOXMLFileBasicDataRowGetData_MultipleSheets(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, false)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
sheet1Row1 := iterator.Next()
assert.Equal(t, "A1", sheet1Row1.GetData(0))
assert.Equal(t, "B1", sheet1Row1.GetData(1))
assert.Equal(t, "C1", sheet1Row1.GetData(2))
sheet1Row2 := iterator.Next()
assert.Equal(t, "1-A2", sheet1Row2.GetData(0))
assert.Equal(t, "1-B2", sheet1Row2.GetData(1))
assert.Equal(t, "1-C2", sheet1Row2.GetData(2))
sheet1Row3 := iterator.Next()
assert.Equal(t, "1-A3", sheet1Row3.GetData(0))
assert.Equal(t, "1-B3", sheet1Row3.GetData(1))
assert.Equal(t, "1-C3", sheet1Row3.GetData(2))
// skip empty sheet2
sheet3Row1 := iterator.Next()
assert.Equal(t, "A1", sheet3Row1.GetData(0))
assert.Equal(t, "B1", sheet3Row1.GetData(1))
assert.Equal(t, "C1", sheet3Row1.GetData(2))
sheet3Row2 := iterator.Next()
assert.Equal(t, "3-A2", sheet3Row2.GetData(0))
assert.Equal(t, "3-B2", sheet3Row2.GetData(1))
assert.Equal(t, "", sheet3Row2.GetData(2))
sheet4Row1 := iterator.Next()
assert.Equal(t, "A1", sheet4Row1.GetData(0))
assert.Equal(t, "B1", sheet4Row1.GetData(1))
assert.Equal(t, "C1", sheet4Row1.GetData(2))
sheet5Row1 := iterator.Next()
assert.Equal(t, "A1", sheet5Row1.GetData(0))
assert.Equal(t, "B1", sheet5Row1.GetData(1))
assert.Equal(t, "C1", sheet5Row1.GetData(2))
sheet5Row2 := iterator.Next()
assert.Equal(t, "5-A2", sheet5Row2.GetData(0))
assert.Equal(t, "5-B2", sheet5Row2.GetData(1))
assert.Equal(t, "5-C2", sheet5Row2.GetData(2))
sheet5Row3 := iterator.Next()
assert.Equal(t, "5-A3", sheet5Row3.GetData(0))
assert.Equal(t, "5-B3", sheet5Row3.GetData(1))
assert.Equal(t, "5-C3", sheet5Row3.GetData(2))
}
func TestExcelOOXMLFileBasicDataRowGetData_MultipleSheets_HasTitleLine(t *testing.T) {
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_excel_file.xlsx")
assert.Nil(t, err)
datatable, err := CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.Nil(t, err)
iterator := datatable.DataRowIterator()
sheet1Row1 := iterator.Next()
@@ -241,6 +459,6 @@ func TestCreateNewExcelOOXMLFileBasicDataTable_MultipleSheetsWithDifferentHeader
testdata, err := os.ReadFile("../../../testdata/multiple_sheets_with_different_header_row_excel_file.xlsx")
assert.Nil(t, err)
_, err = CreateNewExcelOOXMLFileBasicDataTable(testdata)
_, err = CreateNewExcelOOXMLFileBasicDataTable(testdata, true)
assert.EqualError(t, err, errs.ErrFieldsInMultiTableAreDifferent.Message)
}