From 9faea14e362777c8922b50aaf890c044b21d61c9 Mon Sep 17 00:00:00 2001 From: MaysWind Date: Wed, 26 Nov 2025 22:54:57 +0800 Subject: [PATCH] support import delimiter-separated values file / data with UTF-16 with BOM encoding (#361) --- pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go | 2 ++ src/consts/file.ts | 2 ++ src/locales/de.json | 2 ++ src/locales/en.json | 2 ++ src/locales/es.json | 2 ++ src/locales/fr.json | 2 ++ src/locales/it.json | 2 ++ src/locales/ja.json | 2 ++ src/locales/ko.json | 2 ++ src/locales/nl.json | 2 ++ src/locales/pt_BR.json | 2 ++ src/locales/ru.json | 2 ++ src/locales/th.json | 2 ++ src/locales/uk.json | 2 ++ src/locales/vi.json | 2 ++ src/locales/zh_Hans.json | 2 ++ src/locales/zh_Hant.json | 2 ++ 17 files changed, 34 insertions(+) diff --git a/pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go b/pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go index 2c709117..5f8f9494 100644 --- a/pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go +++ b/pkg/converters/dsv/custom_transaction_data_dsv_file_importer.go @@ -35,6 +35,8 @@ var supportedFileEncodings = map[string]encoding.Encoding{ "utf-8-bom": unicode.UTF8BOM, // UTF-8 with BOM "utf-16le": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), // UTF-16 Little Endian "utf-16be": unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), // UTF-16 Big Endian + "utf-16le-bom": unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM), // UTF-16 Little Endian with BOM + "utf-16be-bom": unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM), // UTF-16 Big Endian with BOM "cp437": charmap.CodePage437, // OEM United States (CP-437) "cp863": charmap.CodePage863, // OEM Canadian French (CP-863) "cp037": charmap.CodePage037, // IBM EBCDIC US/Canada (CP-037) diff --git a/src/consts/file.ts b/src/consts/file.ts index 29655e01..2b5205e0 100644 --- a/src/consts/file.ts +++ b/src/consts/file.ts @@ -14,6 +14,8 @@ export const SUPPORTED_FILE_ENCODINGS: string[] = [ 'utf-8-bom', // UTF-8 with BOM 'utf-16le', // UTF-16 Little Endian 'utf-16be', // UTF-16 Big Endian + 'utf-16le-bom', // UTF-16 Little Endian with BOM + 'utf-16be-bom', // UTF-16 Big Endian with BOM 'cp437', // OEM United States (CP-437) 'cp863', // OEM Canadian French (CP-863) 'cp037', // IBM EBCDIC US/Canada (CP-037) diff --git a/src/locales/de.json b/src/locales/de.json index 14ded857..e8cf6216 100644 --- a/src/locales/de.json +++ b/src/locales/de.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/en.json b/src/locales/en.json index c3f80b3b..947b56ce 100644 --- a/src/locales/en.json +++ b/src/locales/en.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/es.json b/src/locales/es.json index 61d3fd71..8a9e0d2f 100644 --- a/src/locales/es.json +++ b/src/locales/es.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/fr.json b/src/locales/fr.json index b86dc45f..7b2b2a3c 100644 --- a/src/locales/fr.json +++ b/src/locales/fr.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 avec BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM États-Unis (CP-437)", "cp863": "OEM Canadien Français (CP-863)", "cp037": "IBM EBCDIC États-Unis/Canada (CP-037)", diff --git a/src/locales/it.json b/src/locales/it.json index 2e7019fc..3235ec77 100644 --- a/src/locales/it.json +++ b/src/locales/it.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/ja.json b/src/locales/ja.json index f4468bb9..dd02c883 100644 --- a/src/locales/ja.json +++ b/src/locales/ja.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM 米国 (CP-437)", "cp863": "OEM カナダ系フランス語 (CP-863)", "cp037": "IBM EBCDIC 米国/カナダ (CP-037)", diff --git a/src/locales/ko.json b/src/locales/ko.json index 1ebc7f00..17663368 100644 --- a/src/locales/ko.json +++ b/src/locales/ko.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 리틀 엔디안", "utf-16be": "UTF-16 빅 엔디안", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM 미국 (CP-437)", "cp863": "OEM 캐나다 프랑스어 (CP-863)", "cp037": "IBM EBCDIC 미국/캐나다 (CP-037)", diff --git a/src/locales/nl.json b/src/locales/nl.json index 461041a1..90868492 100644 --- a/src/locales/nl.json +++ b/src/locales/nl.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/pt_BR.json b/src/locales/pt_BR.json index 6df7ffe2..d0b75468 100644 --- a/src/locales/pt_BR.json +++ b/src/locales/pt_BR.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 com BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM Estados Unidos (CP-437)", "cp863": "OEM Francês Canadense (CP-863)", "cp037": "IBM EBCDIC EUA/Canadá (CP-037)", diff --git a/src/locales/ru.json b/src/locales/ru.json index 130dcbf4..51be9e5c 100644 --- a/src/locales/ru.json +++ b/src/locales/ru.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/th.json b/src/locales/th.json index d755c22a..cdaed0c9 100644 --- a/src/locales/th.json +++ b/src/locales/th.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 พร้อม BOM", "utf-16le": "UTF-16 Endian เล็ก", "utf-16be": "UTF-16 Endian ใหญ่", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM สหรัฐอเมริกา (CP-437)", "cp863": "OEM ฝรั่งเศสแคนาดา (CP-863)", "cp037": "IBM EBCDIC สหรัฐอเมริกา/แคนาดา (CP-037)", diff --git a/src/locales/uk.json b/src/locales/uk.json index 1d9e120e..298003d5 100644 --- a/src/locales/uk.json +++ b/src/locales/uk.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/vi.json b/src/locales/vi.json index 85c5d332..6ce3caf7 100644 --- a/src/locales/vi.json +++ b/src/locales/vi.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 with BOM", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian with BOM", + "utf-16be-bom": "UTF-16 Big Endian with BOM", "cp437": "OEM United States (CP-437)", "cp863": "OEM Canadian French (CP-863)", "cp037": "IBM EBCDIC US/Canada (CP-037)", diff --git a/src/locales/zh_Hans.json b/src/locales/zh_Hans.json index 1c0cf19d..8bc86954 100644 --- a/src/locales/zh_Hans.json +++ b/src/locales/zh_Hans.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 带签名", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian 带签名", + "utf-16be-bom": "UTF-16 Big Endian 带签名", "cp437": "OEM 美国 (CP-437)", "cp863": "OEM 加拿大法语 (CP-863)", "cp037": "IBM EBCDIC 美国/加拿大 (CP-037)", diff --git a/src/locales/zh_Hant.json b/src/locales/zh_Hant.json index 64051660..eecb560a 100644 --- a/src/locales/zh_Hant.json +++ b/src/locales/zh_Hant.json @@ -1336,6 +1336,8 @@ "utf-8-bom": "UTF-8 帶簽名", "utf-16le": "UTF-16 Little Endian", "utf-16be": "UTF-16 Big Endian", + "utf-16le-bom": "UTF-16 Little Endian 帶簽名", + "utf-16be-bom": "UTF-16 Big Endian 帶簽名", "cp437": "OEM 美國 (CP-437)", "cp863": "OEM 加拿大法語 (CP-863)", "cp037": "IBM EBCDIC 美國/加拿大 (CP-037)",