support utf-32 file encoding

This commit is contained in:
MaysWind
2026-03-01 16:04:29 +08:00
parent 9d275a3051
commit d7a0d253c4
21 changed files with 45 additions and 2 deletions
@@ -14,6 +14,7 @@ import (
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/encoding/unicode/utf32"
"golang.org/x/text/transform"
"github.com/mayswind/ezbookkeeping/pkg/converters/converter"
@@ -36,6 +37,8 @@ var supportedFileEncodings = map[string]encoding.Encoding{
"utf-8": unicode.UTF8BOM, // UTF-8
"utf-16le": unicode.UTF16(unicode.LittleEndian, unicode.UseBOM), // UTF-16 Little Endian
"utf-16be": unicode.UTF16(unicode.BigEndian, unicode.UseBOM), // UTF-16 Big Endian
"utf-32le": utf32.UTF32(utf32.LittleEndian, utf32.UseBOM), // UTF-32 Little Endian
"utf-32be": utf32.UTF32(utf32.BigEndian, utf32.UseBOM), // UTF-32 Big Endian
"cp437": charmap.CodePage437, // OEM United States (CP-437)
"cp863": charmap.CodePage863, // OEM Canadian French (CP-863)
"cp037": charmap.CodePage037, // IBM EBCDIC US/Canada (CP-037)
+4 -2
View File
@@ -15,6 +15,8 @@ export const SUPPORTED_FILE_ENCODINGS: string[] = [
UTF_8, // UTF-8
'utf-16le', // UTF-16 Little Endian
'utf-16be', // UTF-16 Big Endian
'utf-32le', // UTF-32 Little Endian
'utf-32be', // UTF-32 Big Endian
'cp437', // OEM United States (CP-437)
'cp863', // OEM Canadian French (CP-863)
'cp037', // IBM EBCDIC US/Canada (CP-037)
@@ -67,8 +69,8 @@ export const CHARDET_ENCODING_NAME_MAPPING: Record<string, string> = {
'UTF-8': UTF_8,
'UTF-16LE': 'utf-16le',
'UTF-16BE': 'utf-16be',
// 'UTF-32 LE': '', // not supported
// 'UTF-32 BE': '', // not supported
'UTF-32LE': 'utf-32le',
'UTF-32BE': 'utf-32be',
'ISO-2022-JP': 'iso-2022-jp',
// 'ISO-2022-KR': '', // not supported
// 'ISO-2022-CN': '', // not supported
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM United States (CP-437)",
"cp863": "OEM Canadian French (CP-863)",
"cp037": "IBM EBCDIC US/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM United States (CP-437)",
"cp863": "OEM Canadian French (CP-863)",
"cp037": "IBM EBCDIC US/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM United States (CP-437)",
"cp863": "OEM Canadian French (CP-863)",
"cp037": "IBM EBCDIC US/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM États-Unis (CP-437)",
"cp863": "OEM Canadien Français (CP-863)",
"cp037": "IBM EBCDIC États-Unis/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM United States (CP-437)",
"cp863": "OEM Canadian French (CP-863)",
"cp037": "IBM EBCDIC US/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM 米国 (CP-437)",
"cp863": "OEM カナダ系フランス語 (CP-863)",
"cp037": "IBM EBCDIC 米国/カナダ (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 ಲಿಟಲ್ ಎಂಡಿಯನ್",
"utf-16be": "UTF-16 ಬಿಗ್ ಎಂಡಿಯನ್",
"utf-32le": "UTF-32 ಲಿಟಲ್ ಎಂಡಿಯನ್",
"utf-32be": "UTF-32 ಬಿಗ್ ಎಂಡಿಯನ್",
"cp437": "OEM ಯುನೈಟೆಡ್ ಸ್ಟೇಟ್ಸ್ (CP-437)",
"cp863": "OEM ಕ್ಯಾನಡಿಯನ್ ಫ್ರೆಂಚ್ (CP-863)",
"cp037": "IBM EBCDIC ಯುಎಸ್/ಕ್ಯಾನಡಾ (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 리틀 엔디안",
"utf-16be": "UTF-16 빅 엔디안",
"utf-32le": "UTF-32 리틀 엔디안",
"utf-32be": "UTF-32 빅 엔디안",
"cp437": "OEM 미국 (CP-437)",
"cp863": "OEM 캐나다 프랑스어 (CP-863)",
"cp037": "IBM EBCDIC 미국/캐나다 (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM United States (CP-437)",
"cp863": "OEM Canadian French (CP-863)",
"cp037": "IBM EBCDIC US/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM Estados Unidos (CP-437)",
"cp863": "OEM Francês Canadense (CP-863)",
"cp037": "IBM EBCDIC EUA/Canadá (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM United States (CP-437)",
"cp863": "OEM Canadian French (CP-863)",
"cp037": "IBM EBCDIC US/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM Združene države (CP-437)",
"cp863": "OEM kanadska francoščina (CP-863)",
"cp037": "IBM EBCDIC ZDA/Kanada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 லிட்டில் எண்டியன்",
"utf-16be": "UTF-16 பிக் எண்டியன்",
"utf-32le": "UTF-32 லிட்டில் எண்டியன்",
"utf-32be": "UTF-32 பிக் எண்டியன்",
"cp437": "OEM அமெரிக்கா (CP-437)",
"cp863": "OEM கனடா பிரஞ்சு (CP-863)",
"cp037": "IBM EBCDIC அமெரிக்கா/கனடா (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Endian เล็ก",
"utf-16be": "UTF-16 Endian ใหญ่",
"utf-32le": "UTF-32 Endian เล็ก",
"utf-32be": "UTF-32 Endian ใหญ่",
"cp437": "OEM สหรัฐอเมริกา (CP-437)",
"cp863": "OEM ฝรั่งเศสแคนาดา (CP-863)",
"cp037": "IBM EBCDIC สหรัฐอเมริกา/แคนาดา (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM Birleşik Devletler (CP-437)",
"cp863": "OEM Kanada Fransızcası (CP-863)",
"cp037": "IBM EBCDIC ABD/Kanada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM United States (CP-437)",
"cp863": "OEM Canadian French (CP-863)",
"cp037": "IBM EBCDIC US/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM United States (CP-437)",
"cp863": "OEM Canadian French (CP-863)",
"cp037": "IBM EBCDIC US/Canada (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM 美国 (CP-437)",
"cp863": "OEM 加拿大法语 (CP-863)",
"cp037": "IBM EBCDIC 美国/加拿大 (CP-037)",
+2
View File
@@ -1355,6 +1355,8 @@
"utf-8": "UTF-8",
"utf-16le": "UTF-16 Little Endian",
"utf-16be": "UTF-16 Big Endian",
"utf-32le": "UTF-32 Little Endian",
"utf-32be": "UTF-32 Big Endian",
"cp437": "OEM 美國 (CP-437)",
"cp863": "OEM 加拿大法語 (CP-863)",
"cp037": "IBM EBCDIC 美國/加拿大 (CP-037)",