support utf-32 file encoding
This commit is contained in:
@@ -14,6 +14,7 @@ import (
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/encoding/unicode/utf32"
|
||||
"golang.org/x/text/transform"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/converters/converter"
|
||||
@@ -36,6 +37,8 @@ var supportedFileEncodings = map[string]encoding.Encoding{
|
||||
"utf-8": unicode.UTF8BOM, // UTF-8
|
||||
"utf-16le": unicode.UTF16(unicode.LittleEndian, unicode.UseBOM), // UTF-16 Little Endian
|
||||
"utf-16be": unicode.UTF16(unicode.BigEndian, unicode.UseBOM), // UTF-16 Big Endian
|
||||
"utf-32le": utf32.UTF32(utf32.LittleEndian, utf32.UseBOM), // UTF-32 Little Endian
|
||||
"utf-32be": utf32.UTF32(utf32.BigEndian, utf32.UseBOM), // UTF-32 Big Endian
|
||||
"cp437": charmap.CodePage437, // OEM United States (CP-437)
|
||||
"cp863": charmap.CodePage863, // OEM Canadian French (CP-863)
|
||||
"cp037": charmap.CodePage037, // IBM EBCDIC US/Canada (CP-037)
|
||||
|
||||
+4
-2
@@ -15,6 +15,8 @@ export const SUPPORTED_FILE_ENCODINGS: string[] = [
|
||||
UTF_8, // UTF-8
|
||||
'utf-16le', // UTF-16 Little Endian
|
||||
'utf-16be', // UTF-16 Big Endian
|
||||
'utf-32le', // UTF-32 Little Endian
|
||||
'utf-32be', // UTF-32 Big Endian
|
||||
'cp437', // OEM United States (CP-437)
|
||||
'cp863', // OEM Canadian French (CP-863)
|
||||
'cp037', // IBM EBCDIC US/Canada (CP-037)
|
||||
@@ -67,8 +69,8 @@ export const CHARDET_ENCODING_NAME_MAPPING: Record<string, string> = {
|
||||
'UTF-8': UTF_8,
|
||||
'UTF-16LE': 'utf-16le',
|
||||
'UTF-16BE': 'utf-16be',
|
||||
// 'UTF-32 LE': '', // not supported
|
||||
// 'UTF-32 BE': '', // not supported
|
||||
'UTF-32LE': 'utf-32le',
|
||||
'UTF-32BE': 'utf-32be',
|
||||
'ISO-2022-JP': 'iso-2022-jp',
|
||||
// 'ISO-2022-KR': '', // not supported
|
||||
// 'ISO-2022-CN': '', // not supported
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM États-Unis (CP-437)",
|
||||
"cp863": "OEM Canadien Français (CP-863)",
|
||||
"cp037": "IBM EBCDIC États-Unis/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM 米国 (CP-437)",
|
||||
"cp863": "OEM カナダ系フランス語 (CP-863)",
|
||||
"cp037": "IBM EBCDIC 米国/カナダ (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 ಲಿಟಲ್ ಎಂಡಿಯನ್",
|
||||
"utf-16be": "UTF-16 ಬಿಗ್ ಎಂಡಿಯನ್",
|
||||
"utf-32le": "UTF-32 ಲಿಟಲ್ ಎಂಡಿಯನ್",
|
||||
"utf-32be": "UTF-32 ಬಿಗ್ ಎಂಡಿಯನ್",
|
||||
"cp437": "OEM ಯುನೈಟೆಡ್ ಸ್ಟೇಟ್ಸ್ (CP-437)",
|
||||
"cp863": "OEM ಕ್ಯಾನಡಿಯನ್ ಫ್ರೆಂಚ್ (CP-863)",
|
||||
"cp037": "IBM EBCDIC ಯುಎಸ್/ಕ್ಯಾನಡಾ (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 리틀 엔디안",
|
||||
"utf-16be": "UTF-16 빅 엔디안",
|
||||
"utf-32le": "UTF-32 리틀 엔디안",
|
||||
"utf-32be": "UTF-32 빅 엔디안",
|
||||
"cp437": "OEM 미국 (CP-437)",
|
||||
"cp863": "OEM 캐나다 프랑스어 (CP-863)",
|
||||
"cp037": "IBM EBCDIC 미국/캐나다 (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM Estados Unidos (CP-437)",
|
||||
"cp863": "OEM Francês Canadense (CP-863)",
|
||||
"cp037": "IBM EBCDIC EUA/Canadá (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM Združene države (CP-437)",
|
||||
"cp863": "OEM kanadska francoščina (CP-863)",
|
||||
"cp037": "IBM EBCDIC ZDA/Kanada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 லிட்டில் எண்டியன்",
|
||||
"utf-16be": "UTF-16 பிக் எண்டியன்",
|
||||
"utf-32le": "UTF-32 லிட்டில் எண்டியன்",
|
||||
"utf-32be": "UTF-32 பிக் எண்டியன்",
|
||||
"cp437": "OEM அமெரிக்கா (CP-437)",
|
||||
"cp863": "OEM கனடா பிரஞ்சு (CP-863)",
|
||||
"cp037": "IBM EBCDIC அமெரிக்கா/கனடா (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Endian เล็ก",
|
||||
"utf-16be": "UTF-16 Endian ใหญ่",
|
||||
"utf-32le": "UTF-32 Endian เล็ก",
|
||||
"utf-32be": "UTF-32 Endian ใหญ่",
|
||||
"cp437": "OEM สหรัฐอเมริกา (CP-437)",
|
||||
"cp863": "OEM ฝรั่งเศสแคนาดา (CP-863)",
|
||||
"cp037": "IBM EBCDIC สหรัฐอเมริกา/แคนาดา (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM Birleşik Devletler (CP-437)",
|
||||
"cp863": "OEM Kanada Fransızcası (CP-863)",
|
||||
"cp037": "IBM EBCDIC ABD/Kanada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM 美国 (CP-437)",
|
||||
"cp863": "OEM 加拿大法语 (CP-863)",
|
||||
"cp037": "IBM EBCDIC 美国/加拿大 (CP-037)",
|
||||
|
||||
@@ -1355,6 +1355,8 @@
|
||||
"utf-8": "UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-32le": "UTF-32 Little Endian",
|
||||
"utf-32be": "UTF-32 Big Endian",
|
||||
"cp437": "OEM 美國 (CP-437)",
|
||||
"cp863": "OEM 加拿大法語 (CP-863)",
|
||||
"cp037": "IBM EBCDIC 美國/加拿大 (CP-037)",
|
||||
|
||||
Reference in New Issue
Block a user