Files
ezbookkeeping/pkg/converters/ofx/ofx_data_reader.go
T

330 lines
9.4 KiB
Go

package ofx
import (
"bufio"
"bytes"
"encoding/xml"
"io"
"regexp"
"strings"
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
"github.com/mayswind/ezbookkeeping/pkg/converters/sgml"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/utils"
)
const ofx1USAsciiEncoding = "usascii"
const ofx1UnicodeEncoding = "unicode"
const ofx1UTF8Encoding = "utf8" // non-standard ofx 1.x encoding, used by some banks (https://github.com/mayswind/ezbookkeeping/issues/48)
const ofx1SGMLDataFormat = "OFXSGML"
var ofx2HeaderPattern = regexp.MustCompile("<\\?OFX( +[A-Z]+=\"[^=]*\")* *\\?>")
var ofx2HeaderAttributePattern = regexp.MustCompile(" +([A-Z]+)=\"([^=]*)\"")
// ofxFileReader defines the structure of open financial exchange (ofx) file reader
type ofxFileReader interface {
// read returns the imported open financial exchange (ofx) file
read(ctx core.Context) (*ofxFile, error)
}
// ofxVersion1FileReader defines the structure of open financial exchange (ofx) declaration version 1.x file reader
type ofxVersion1FileReader struct {
fileHeader *ofxFileHeader
sgmlDecoder *sgml.Decoder
}
// ofxVersion2FileReader defines the structure of open financial exchange (ofx) declaration version 2.x file reader
type ofxVersion2FileReader struct {
fileHeader *ofxFileHeader
xmlDecoder *xml.Decoder
}
// read returns the imported open financial exchange (ofx) file
// Reference: https://www.financialdataexchange.org/FDX/FDX/About/OFX-Work-Group.aspx?a315d1c24e44=2
func (r *ofxVersion1FileReader) read(ctx core.Context) (*ofxFile, error) {
file := &ofxFile{}
err := r.sgmlDecoder.Decode(&file)
if err != nil {
log.Errorf(ctx, "[ofxVersion1FileReader.read] cannot read ofx 1.x file, because %s", err.Error())
return nil, errs.ErrInvalidOFXFile
}
file.FileHeader = r.fileHeader
return file, nil
}
// read returns the imported open financial exchange (ofx) file
func (r *ofxVersion2FileReader) read(ctx core.Context) (*ofxFile, error) {
file := &ofxFile{}
err := r.xmlDecoder.Decode(&file)
if err != nil {
log.Errorf(ctx, "[ofxVersion2FileReader.read] cannot read ofx 2.x file, because %s", err.Error())
return nil, errs.ErrInvalidOFXFile
}
file.FileHeader = r.fileHeader
return file, nil
}
func createNewOFXFileReader(ctx core.Context, data []byte) (ofxFileReader, error) {
firstNonCrLfIndex := 0
for i := 0; i < len(data); i++ {
if data[i] != '\n' && data[i] != '\r' {
firstNonCrLfIndex = i
break
}
}
if len(data) > 5 && string(data[firstNonCrLfIndex:firstNonCrLfIndex+5]) == "<?xml" { // ofx 2.x starts with <?xml
return createNewOFX2FileReader(ctx, data, true)
} else if len(data) > 10 && string(data[firstNonCrLfIndex:firstNonCrLfIndex+10]) == "OFXHEADER:" { // ofx 1.x starts with OFXHEADER:
return createNewOFX1FileReader(ctx, data)
} else if len(data) > 5 && string(data[firstNonCrLfIndex:firstNonCrLfIndex+5]) == "<OFX>" { // no ofx header
return createNewOFX2FileReader(ctx, data, false)
}
return nil, errs.ErrInvalidOFXFile
}
func createNewOFX1FileReader(ctx core.Context, data []byte) (ofxFileReader, error) {
fileHeader, fileData, dataType, enc, err := readOFX1FileHeader(ctx, data)
if err != nil {
return nil, err
}
if fileHeader.OFXDeclarationVersion != ofxVersion1 {
log.Errorf(ctx, "[ofx_data_reader.createNewOFX1FileReader] cannot parse ofx 1.x file header, because declaration version is \"%s\"", fileHeader.OFXDeclarationVersion)
return nil, errs.ErrInvalidOFXFile
}
if dataType != ofx1SGMLDataFormat {
log.Errorf(ctx, "[ofx_data_reader.createNewOFX1FileReader] cannot parse ofx 1.x file header, because data type is \"%s\"", dataType)
return nil, errs.ErrInvalidOFXFile
}
reader := bytes.NewReader(fileData)
buffer := &bytes.Buffer{}
if enc != nil {
transformReader := transform.NewReader(reader, enc.NewDecoder())
_, err = buffer.ReadFrom(transformReader)
} else {
_, err = buffer.ReadFrom(reader)
}
if err != nil {
log.Errorf(ctx, "[ofx_data_reader.createNewOFX1FileReader] cannot read ofx 1.x file content, because %s", err.Error())
return nil, errs.ErrInvalidOFXFile
}
sgmlData := buffer.String()
stringReader := strings.NewReader(sgmlData)
sgmlDecoder := sgml.NewDecoder(stringReader)
return &ofxVersion1FileReader{
fileHeader: fileHeader,
sgmlDecoder: sgmlDecoder,
}, nil
}
func createNewOFX2FileReader(ctx core.Context, data []byte, withHeader bool) (ofxFileReader, error) {
var fileHeader *ofxFileHeader = nil
var err error
if withHeader {
fileHeader, err = readOFX2FileHeader(ctx, data)
if err != nil {
return nil, err
}
if fileHeader.OFXDeclarationVersion != ofxVersion2 {
log.Errorf(ctx, "[ofx_data_reader.createNewOFX2FileReader] cannot parse ofx 2.x file header, because declaration version is \"%s\"", fileHeader.OFXDeclarationVersion)
return nil, errs.ErrInvalidOFXFile
}
}
xmlDecoder := xml.NewDecoder(bytes.NewReader(data))
xmlDecoder.CharsetReader = charset.NewReaderLabel
return &ofxVersion2FileReader{
fileHeader: fileHeader,
xmlDecoder: xmlDecoder,
}, nil
}
func readOFX1FileHeader(ctx core.Context, data []byte) (fileHeader *ofxFileHeader, fileData []byte, dataType string, enc encoding.Encoding, err error) {
fileHeader = &ofxFileHeader{}
dataType = ""
fileEncoding := ""
fileCharset := ""
fileDataStartPosition := 0
lastCrLf := -1
for i := 0; i < len(data); i++ {
if data[i] != '\n' && data[i] != '\r' {
continue
}
if lastCrLf == i-1 {
lastCrLf = i
continue
}
line := string(data[lastCrLf+1 : i])
if strings.Index(line, "<OFX>") == 0 {
fileDataStartPosition = lastCrLf + 1
break
}
lastCrLf = i
if line == "" {
continue
}
items := strings.Split(line, ":")
if len(items) != 2 {
log.Warnf(ctx, "[ofx_data_reader.readOFX1FileHeader] cannot parse line in ofx 1.x file header, because line is \"%s\"", line)
continue
}
key := items[0]
value := items[1]
if key == "OFXHEADER" {
fileHeader.OFXDeclarationVersion = oFXDeclarationVersion(value)
} else if key == "DATA" {
dataType = value
} else if key == "VERSION" {
fileHeader.OFXDataVersion = value
} else if key == "SECURITY" {
fileHeader.Security = value
} else if key == "ENCODING" {
fileEncoding = strings.ToLower(value)
} else if key == "CHARSET" {
fileCharset = strings.ToLower(value)
} else if key == "COMPRESSION" {
continue // ignore
} else if key == "OLDFILEUID" {
fileHeader.OldFileUid = value
} else if key == "NEWFILEUID" {
fileHeader.NewFileUid = value
} else {
log.Warnf(ctx, "[ofx_data_reader.readOFX1FileHeader] cannot parse unknown header line in ofx 1.x file header, because line is \"%s\"", line)
continue
}
}
if fileEncoding == ofx1USAsciiEncoding {
if utils.IsStringOnlyContainsDigits(fileCharset) {
fileCharset = "cp" + fileCharset
}
enc, _ = charset.Lookup(fileCharset)
if enc == nil {
enc, _ = charset.Lookup("us-ascii")
}
if enc == nil {
enc = charmap.Windows1252
}
} else if fileEncoding == ofx1UnicodeEncoding {
enc, _ = charset.Lookup(ofx1UnicodeEncoding)
if enc == nil {
enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
}
} else if fileEncoding == ofx1UTF8Encoding {
enc, _ = charset.Lookup(ofx1UTF8Encoding)
if enc == nil {
enc = unicode.UTF8
}
} else {
log.Errorf(ctx, "[ofx_data_reader.readOFX1FileHeader] cannot parse ofx 1.x file, because encoding \"%s\" is unknown", fileEncoding)
return nil, nil, "", nil, errs.ErrInvalidOFXFile
}
return fileHeader, data[fileDataStartPosition:], dataType, enc, nil
}
func readOFX2FileHeader(ctx core.Context, data []byte) (fileHeader *ofxFileHeader, err error) {
reader := bytes.NewReader(data)
bufReader := bufio.NewReader(reader)
fileHeader = &ofxFileHeader{}
headerLine := ""
for {
line, err := bufReader.ReadString('\n')
ofxHeaderStartIndex := strings.Index(line, "<?OFX ")
if ofxHeaderStartIndex >= 0 {
headerLine = ofx2HeaderPattern.FindString(line)
break
}
if err != nil {
if err == io.EOF {
break
} else {
log.Errorf(ctx, "[ofx_data_reader.readOFX2FileHeader] cannot read ofx 2.x file, because %s", err.Error())
return nil, errs.ErrInvalidOFXFile
}
}
}
if headerLine == "" {
log.Errorf(ctx, "[ofx_data_reader.readOFX2FileHeader] cannot find ofx 2.x file header")
return nil, errs.ErrInvalidOFXFile
}
headerAttributes := ofx2HeaderAttributePattern.FindAllStringSubmatch(headerLine, -1)
for _, attributeItems := range headerAttributes {
if len(attributeItems) != 3 {
log.Warnf(ctx, "[ofx_data_reader.readOFX2FileHeader] cannot parse line in ofx 2.x file header, because item is \"%s\"", attributeItems)
continue
}
name := attributeItems[1]
value := attributeItems[2]
if name == "OFXHEADER" {
fileHeader.OFXDeclarationVersion = oFXDeclarationVersion(value)
} else if name == "VERSION" {
fileHeader.OFXDataVersion = value
} else if name == "SECURITY" {
fileHeader.Security = value
} else if name == "OLDFILEUID" {
fileHeader.OldFileUid = value
} else if name == "NEWFILEUID" {
fileHeader.NewFileUid = value
} else {
log.Warnf(ctx, "[ofx_data_reader.readOFX2FileHeader] cannot parse unknown header line in ofx 2.x file header, because item is \"%s\"", attributeItems)
continue
}
}
return fileHeader, nil
}