From d7a0d253c45c1b4292203f058646a83847be6daa Mon Sep 17 00:00:00 2001 From: MaysWind Date: Sun, 1 Mar 2026 16:04:29 +0800 Subject: [PATCH] support utf-32 file encoding --- .../custom/custom_transaction_data_dsv_file_importer.go | 3 +++ src/consts/file.ts | 6 ++++-- src/locales/de.json | 2 ++ src/locales/en.json | 2 ++ src/locales/es.json | 2 ++ src/locales/fr.json | 2 ++ src/locales/it.json | 2 ++ src/locales/ja.json | 2 ++ src/locales/kn.json | 2 ++ src/locales/ko.json | 2 ++ src/locales/nl.json | 2 ++ src/locales/pt_BR.json | 2 ++ src/locales/ru.json | 2 ++ src/locales/sl.json | 2 ++ src/locales/ta.json | 2 ++ src/locales/th.json | 2 ++ src/locales/tr.json | 2 ++ src/locales/uk.json | 2 ++ src/locales/vi.json | 2 ++ src/locales/zh_Hans.json | 2 ++ src/locales/zh_Hant.json | 2 ++ 21 files changed, 45 insertions(+), 2 deletions(-) diff --git a/pkg/converters/custom/custom_transaction_data_dsv_file_importer.go b/pkg/converters/custom/custom_transaction_data_dsv_file_importer.go index c55d24df..f6eb35a3 100644 --- a/pkg/converters/custom/custom_transaction_data_dsv_file_importer.go +++ b/pkg/converters/custom/custom_transaction_data_dsv_file_importer.go @@ -14,6 +14,7 @@ import ( "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/encoding/traditionalchinese" "golang.org/x/text/encoding/unicode" + "golang.org/x/text/encoding/unicode/utf32" "golang.org/x/text/transform" "github.com/mayswind/ezbookkeeping/pkg/converters/converter" @@ -36,6 +37,8 @@ var supportedFileEncodings = map[string]encoding.Encoding{ "utf-8": unicode.UTF8BOM, // UTF-8 "utf-16le": unicode.UTF16(unicode.LittleEndian, unicode.UseBOM), // UTF-16 Little Endian "utf-16be": unicode.UTF16(unicode.BigEndian, unicode.UseBOM), // UTF-16 Big Endian + "utf-32le": utf32.UTF32(utf32.LittleEndian, utf32.UseBOM), // UTF-32 Little Endian + "utf-32be": utf32.UTF32(utf32.BigEndian, utf32.UseBOM), // UTF-32 Big Endian "cp437": charmap.CodePage437, // OEM United States (CP-437) "cp863": charmap.CodePage863, // OEM Canadian French (CP-863) "cp037": charmap.CodePage037, // IBM EBCDIC US/Canada (CP-037) diff --git a/src/consts/file.ts b/src/consts/file.ts index 7486c066..3016590c 100644 --- a/src/consts/file.ts +++ b/src/consts/file.ts @@ -15,6 +15,8 @@ export const SUPPORTED_FILE_ENCODINGS: string[] = [ UTF_8, // UTF-8 'utf-16le', // UTF-16 Little Endian 'utf-16be', // UTF-16 Big Endian + 'utf-32le', // UTF-32 Little Endian + 'utf-32be', // UTF-32 Big Endian 'cp437', // OEM United States (CP-437) 'cp863', // OEM Canadian French (CP-863) 'cp037', // IBM EBCDIC US/Canada (CP-037) @@ -67,8 +69,8 @@ export const CHARDET_ENCODING_NAME_MAPPING: Record = { 'UTF-8': UTF_8, 'UTF-16LE': 'utf-16le', 'UTF-16BE': 'utf-16be', - // 'UTF-32 LE': '', // not supported - // 'UTF-32 BE': '', // not supported + 'UTF-32LE': 'utf-32le', + 'UTF-32BE': 'utf-32be', 'ISO-2022-JP': 'iso-2022-jp', // 'ISO-2022-KR': '', // not supported // 'ISO-2022-CN': '', // not supported diff --git a/src/locales/de.json b/src/locales/de.json index 9722ec29..8106a7d8 100644 --- a/src/locales/de.json +++ b/src/locales/de.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/en.json b/src/locales/en.json index 69dabbe8..4b1c01de 100644 --- a/src/locales/en.json +++ b/src/locales/en.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/es.json b/src/locales/es.json index 4520de23..5cc6b427 100644 --- a/src/locales/es.json +++ b/src/locales/es.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/fr.json b/src/locales/fr.json index 90cf10b7..c0a9fed9 100644 --- a/src/locales/fr.json +++ b/src/locales/fr.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM États-Unis (CP-437)", "cp863": "OEM Canadien Français (CP-863)", "cp037": "IBM EBCDIC États-Unis/Canada (CP-037)", diff --git a/src/locales/it.json b/src/locales/it.json index 9e05f6f4..27eac78f 100644 --- a/src/locales/it.json +++ b/src/locales/it.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/ja.json b/src/locales/ja.json index 3a1559d5..aa0ea3a1 100644 --- a/src/locales/ja.json +++ b/src/locales/ja.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM 米国 (CP-437)", "cp863": "OEM カナダ系フランス語 (CP-863)", "cp037": "IBM EBCDIC 米国/カナダ (CP-037)", diff --git a/src/locales/kn.json b/src/locales/kn.json index 6ebe84a3..7cf971bd 100644 --- a/src/locales/kn.json +++ b/src/locales/kn.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 ಲಿಟಲ್ ಎಂಡಿಯನ್", "utf-16be": "UTF-16 ಬಿಗ್ ಎಂಡಿಯನ್", + "utf-32le": "UTF-32 ಲಿಟಲ್ ಎಂಡಿಯನ್", + "utf-32be": "UTF-32 ಬಿಗ್ ಎಂಡಿಯನ್", "cp437": "OEM ಯುನೈಟೆಡ್ ಸ್ಟೇಟ್ಸ್ (CP-437)", "cp863": "OEM ಕ್ಯಾನಡಿಯನ್ ಫ್ರೆಂಚ್ (CP-863)", "cp037": "IBM EBCDIC ಯುಎಸ್/ಕ್ಯಾನಡಾ (CP-037)", diff --git a/src/locales/ko.json b/src/locales/ko.json index b1f63bd6..09f46f90 100644 --- a/src/locales/ko.json +++ b/src/locales/ko.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 리틀 엔디안", "utf-16be": "UTF-16 빅 엔디안", + "utf-32le": "UTF-32 리틀 엔디안", + "utf-32be": "UTF-32 빅 엔디안", "cp437": "OEM 미국 (CP-437)", "cp863": "OEM 캐나다 프랑스어 (CP-863)", "cp037": "IBM EBCDIC 미국/캐나다 (CP-037)", diff --git a/src/locales/nl.json b/src/locales/nl.json index 3829f805..59ba56ad 100644 --- a/src/locales/nl.json +++ b/src/locales/nl.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/pt_BR.json b/src/locales/pt_BR.json index 69c579d6..006eb3b8 100644 --- a/src/locales/pt_BR.json +++ b/src/locales/pt_BR.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM Estados Unidos (CP-437)", "cp863": "OEM Francês Canadense (CP-863)", "cp037": "IBM EBCDIC EUA/Canadá (CP-037)", diff --git a/src/locales/ru.json b/src/locales/ru.json index edbe91df..2a04dcf4 100644 --- a/src/locales/ru.json +++ b/src/locales/ru.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/sl.json b/src/locales/sl.json index e08c1d17..8d5b1b59 100644 --- a/src/locales/sl.json +++ b/src/locales/sl.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM Združene države (CP-437)", "cp863": "OEM kanadska francoščina (CP-863)", "cp037": "IBM EBCDIC ZDA/Kanada (CP-037)", diff --git a/src/locales/ta.json b/src/locales/ta.json index e9f3aaf5..501a82a6 100644 --- a/src/locales/ta.json +++ b/src/locales/ta.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 லிட்டில் எண்டியன்", "utf-16be": "UTF-16 பிக் எண்டியன்", + "utf-32le": "UTF-32 லிட்டில் எண்டியன்", + "utf-32be": "UTF-32 பிக் எண்டியன்", "cp437": "OEM அமெரிக்கா (CP-437)", "cp863": "OEM கனடா பிரஞ்சு (CP-863)", "cp037": "IBM EBCDIC அமெரிக்கா/கனடா (CP-037)", diff --git a/src/locales/th.json b/src/locales/th.json index 079e8d65..e0281c9e 100644 --- a/src/locales/th.json +++ b/src/locales/th.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Endian เล็ก", "utf-16be": "UTF-16 Endian ใหญ่", + "utf-32le": "UTF-32 Endian เล็ก", + "utf-32be": "UTF-32 Endian ใหญ่", "cp437": "OEM สหรัฐอเมริกา (CP-437)", "cp863": "OEM ฝรั่งเศสแคนาดา (CP-863)", "cp037": "IBM EBCDIC สหรัฐอเมริกา/แคนาดา (CP-037)", diff --git a/src/locales/tr.json b/src/locales/tr.json index 38110cca..9f6cab11 100644 --- a/src/locales/tr.json +++ b/src/locales/tr.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM Birleşik Devletler (CP-437)", "cp863": "OEM Kanada Fransızcası (CP-863)", "cp037": "IBM EBCDIC ABD/Kanada (CP-037)", diff --git a/src/locales/uk.json b/src/locales/uk.json index c521836b..f2a1c7b6 100644 --- a/src/locales/uk.json +++ b/src/locales/uk.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/vi.json b/src/locales/vi.json index 1e405183..6e5d2c3a 100644 --- a/src/locales/vi.json +++ b/src/locales/vi.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/zh_Hans.json b/src/locales/zh_Hans.json index ea5e0620..e7b8bf99 100644 --- a/src/locales/zh_Hans.json +++ b/src/locales/zh_Hans.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM 美国 (CP-437)", "cp863": "OEM 加拿大法语 (CP-863)", "cp037": "IBM EBCDIC 美国/加拿大 (CP-037)", diff --git a/src/locales/zh_Hant.json b/src/locales/zh_Hant.json index cf748654..c8dadf15 100644 --- a/src/locales/zh_Hant.json +++ b/src/locales/zh_Hant.json @@ -1355,6 +1355,8 @@ "utf-8": "UTF-8", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-32le": "UTF-32 Little Endian", + "utf-32be": "UTF-32 Big Endian", "cp437": "OEM 美國 (CP-437)", "cp863": "OEM 加拿大法語 (CP-863)", "cp037": "IBM EBCDIC 美國/加拿大 (CP-037)",