package ofx import ( "bufio" "bytes" "encoding/xml" "io" "regexp" "strings" "golang.org/x/net/html/charset" "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" "github.com/mayswind/ezbookkeeping/pkg/converters/sgml" "github.com/mayswind/ezbookkeeping/pkg/core" "github.com/mayswind/ezbookkeeping/pkg/errs" "github.com/mayswind/ezbookkeeping/pkg/log" "github.com/mayswind/ezbookkeeping/pkg/utils" ) const ofx1USAsciiEncoding = "usascii" const ofx1UnicodeEncoding = "unicode" const ofx1UTF8Encoding = "utf-8" // non-standard ofx 1.x encoding, used by some banks (https://github.com/mayswind/ezbookkeeping/issues/48) const ofx1SGMLDataFormat = "OFXSGML" var ofx2HeaderPattern = regexp.MustCompile("<\\?OFX( +[A-Z]+=\"[^=]*\")* *\\?>") var ofx2HeaderAttributePattern = regexp.MustCompile(" +([A-Z]+)=\"([^=]*)\"") // ofxFileReader defines the structure of open financial exchange (ofx) file reader type ofxFileReader interface { // read returns the imported open financial exchange (ofx) file read(ctx core.Context) (*ofxFile, error) } // ofxVersion1FileReader defines the structure of open financial exchange (ofx) declaration version 1.x file reader type ofxVersion1FileReader struct { fileHeader *ofxFileHeader sgmlDecoder *sgml.Decoder } // ofxVersion2FileReader defines the structure of open financial exchange (ofx) declaration version 2.x file reader type ofxVersion2FileReader struct { fileHeader *ofxFileHeader xmlDecoder *xml.Decoder } // read returns the imported open financial exchange (ofx) file // Reference: https://www.financialdataexchange.org/FDX/FDX/About/OFX-Work-Group.aspx?a315d1c24e44=2 func (r *ofxVersion1FileReader) read(ctx core.Context) (*ofxFile, error) { file := &ofxFile{} err := r.sgmlDecoder.Decode(&file) if err != nil { log.Errorf(ctx, "[ofxVersion1FileReader.read] cannot read ofx 1.x file, because %s", err.Error()) return nil, errs.ErrInvalidOFXFile } file.FileHeader = r.fileHeader return file, nil } // read returns the imported open financial exchange (ofx) file func (r *ofxVersion2FileReader) read(ctx core.Context) (*ofxFile, error) { file := &ofxFile{} err := r.xmlDecoder.Decode(&file) if err != nil { log.Errorf(ctx, "[ofxVersion2FileReader.read] cannot read ofx 2.x file, because %s", err.Error()) return nil, errs.ErrInvalidOFXFile } file.FileHeader = r.fileHeader return file, nil } func createNewOFXFileReader(ctx core.Context, data []byte) (ofxFileReader, error) { firstNonCrLfIndex := 0 for i := 0; i < len(data); i++ { if data[i] != '\n' && data[i] != '\r' { firstNonCrLfIndex = i break } } if len(data) > 5 && string(data[firstNonCrLfIndex:firstNonCrLfIndex+5]) == " 10 && string(data[firstNonCrLfIndex:firstNonCrLfIndex+10]) == "OFXHEADER:" { // ofx 1.x starts with OFXHEADER: return createNewOFX1FileReader(ctx, data) } else if len(data) > 5 && string(data[firstNonCrLfIndex:firstNonCrLfIndex+5]) == "" { // no ofx header return createNewOFX2FileReader(ctx, data, false) } return nil, errs.ErrInvalidOFXFile } func createNewOFX1FileReader(ctx core.Context, data []byte) (ofxFileReader, error) { fileHeader, fileData, dataType, enc, err := readOFX1FileHeader(ctx, data) if err != nil { return nil, err } if fileHeader.OFXDeclarationVersion != ofxVersion1 { log.Errorf(ctx, "[ofx_data_reader.createNewOFX1FileReader] cannot parse ofx 1.x file header, because declaration version is \"%s\"", fileHeader.OFXDeclarationVersion) return nil, errs.ErrInvalidOFXFile } if dataType != ofx1SGMLDataFormat { log.Errorf(ctx, "[ofx_data_reader.createNewOFX1FileReader] cannot parse ofx 1.x file header, because data type is \"%s\"", dataType) return nil, errs.ErrInvalidOFXFile } reader := bytes.NewReader(fileData) buffer := &bytes.Buffer{} if enc != nil { transformReader := transform.NewReader(reader, enc.NewDecoder()) _, err = buffer.ReadFrom(transformReader) } else { _, err = buffer.ReadFrom(reader) } if err != nil { log.Errorf(ctx, "[ofx_data_reader.createNewOFX1FileReader] cannot read ofx 1.x file content, because %s", err.Error()) return nil, errs.ErrInvalidOFXFile } sgmlData := buffer.String() stringReader := strings.NewReader(sgmlData) sgmlDecoder := sgml.NewDecoder(stringReader) return &ofxVersion1FileReader{ fileHeader: fileHeader, sgmlDecoder: sgmlDecoder, }, nil } func createNewOFX2FileReader(ctx core.Context, data []byte, withHeader bool) (ofxFileReader, error) { var fileHeader *ofxFileHeader = nil var err error if withHeader { fileHeader, err = readOFX2FileHeader(ctx, data) if err != nil { return nil, err } if fileHeader.OFXDeclarationVersion != ofxVersion2 { log.Errorf(ctx, "[ofx_data_reader.createNewOFX2FileReader] cannot parse ofx 2.x file header, because declaration version is \"%s\"", fileHeader.OFXDeclarationVersion) return nil, errs.ErrInvalidOFXFile } } xmlDecoder := xml.NewDecoder(bytes.NewReader(data)) xmlDecoder.CharsetReader = charset.NewReaderLabel return &ofxVersion2FileReader{ fileHeader: fileHeader, xmlDecoder: xmlDecoder, }, nil } func readOFX1FileHeader(ctx core.Context, data []byte) (fileHeader *ofxFileHeader, fileData []byte, dataType string, enc encoding.Encoding, err error) { fileHeader = &ofxFileHeader{} dataType = "" fileEncoding := "" fileCharset := "" fileDataStartPosition := 0 lastCrLf := -1 for i := 0; i < len(data); i++ { if data[i] != '\n' && data[i] != '\r' { continue } if lastCrLf == i-1 { lastCrLf = i continue } line := string(data[lastCrLf+1 : i]) if strings.Index(line, "") == 0 { fileDataStartPosition = lastCrLf + 1 break } lastCrLf = i if line == "" { continue } items := strings.Split(line, ":") if len(items) != 2 { log.Warnf(ctx, "[ofx_data_reader.readOFX1FileHeader] cannot parse line in ofx 1.x file header, because line is \"%s\"", line) continue } key := items[0] value := items[1] if key == "OFXHEADER" { fileHeader.OFXDeclarationVersion = oFXDeclarationVersion(value) } else if key == "DATA" { dataType = value } else if key == "VERSION" { fileHeader.OFXDataVersion = value } else if key == "SECURITY" { fileHeader.Security = value } else if key == "ENCODING" { fileEncoding = strings.ToLower(value) } else if key == "CHARSET" { fileCharset = strings.ToLower(value) } else if key == "COMPRESSION" { continue // ignore } else if key == "OLDFILEUID" { fileHeader.OldFileUid = value } else if key == "NEWFILEUID" { fileHeader.NewFileUid = value } else { log.Warnf(ctx, "[ofx_data_reader.readOFX1FileHeader] cannot parse unknown header line in ofx 1.x file header, because line is \"%s\"", line) continue } } if fileEncoding == ofx1USAsciiEncoding { if utils.IsStringOnlyContainsDigits(fileCharset) { fileCharset = "cp" + fileCharset } enc, _ = charset.Lookup(fileCharset) if enc == nil { enc, _ = charset.Lookup("us-ascii") } if enc == nil { enc = charmap.Windows1252 } } else if fileEncoding == ofx1UnicodeEncoding { enc, _ = charset.Lookup(ofx1UnicodeEncoding) if enc == nil { enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) } } else if fileEncoding == ofx1UTF8Encoding { enc, _ = charset.Lookup(ofx1UTF8Encoding) if enc == nil { enc = unicode.UTF8 } } else { log.Errorf(ctx, "[ofx_data_reader.readOFX1FileHeader] cannot parse ofx 1.x file, because encoding \"%s\" is unknown", fileEncoding) return nil, nil, "", nil, errs.ErrInvalidOFXFile } return fileHeader, data[fileDataStartPosition:], dataType, enc, nil } func readOFX2FileHeader(ctx core.Context, data []byte) (fileHeader *ofxFileHeader, err error) { reader := bytes.NewReader(data) bufReader := bufio.NewReader(reader) fileHeader = &ofxFileHeader{} headerLine := "" for { line, err := bufReader.ReadString('\n') ofxHeaderStartIndex := strings.Index(line, "= 0 { headerLine = ofx2HeaderPattern.FindString(line) break } if err != nil { if err == io.EOF { break } else { log.Errorf(ctx, "[ofx_data_reader.readOFX2FileHeader] cannot read ofx 2.x file, because %s", err.Error()) return nil, errs.ErrInvalidOFXFile } } } if headerLine == "" { log.Errorf(ctx, "[ofx_data_reader.readOFX2FileHeader] cannot find ofx 2.x file header") return nil, errs.ErrInvalidOFXFile } headerAttributes := ofx2HeaderAttributePattern.FindAllStringSubmatch(headerLine, -1) for _, attributeItems := range headerAttributes { if len(attributeItems) != 3 { log.Warnf(ctx, "[ofx_data_reader.readOFX2FileHeader] cannot parse line in ofx 2.x file header, because item is \"%s\"", attributeItems) continue } name := attributeItems[1] value := attributeItems[2] if name == "OFXHEADER" { fileHeader.OFXDeclarationVersion = oFXDeclarationVersion(value) } else if name == "VERSION" { fileHeader.OFXDataVersion = value } else if name == "SECURITY" { fileHeader.Security = value } else if name == "OLDFILEUID" { fileHeader.OldFileUid = value } else if name == "NEWFILEUID" { fileHeader.NewFileUid = value } else { log.Warnf(ctx, "[ofx_data_reader.readOFX2FileHeader] cannot parse unknown header line in ofx 2.x file header, because item is \"%s\"", attributeItems) continue } } return fileHeader, nil }