merge UTF-8 and UTF-16 encodings with or without BOM, with BOM automatically detected and handled
This commit is contained in:
@@ -33,12 +33,9 @@ var supportedFileTypeSeparators = map[string]rune{
|
||||
}
|
||||
|
||||
var supportedFileEncodings = map[string]encoding.Encoding{
|
||||
"utf-8": unicode.UTF8, // UTF-8
|
||||
"utf-8-bom": unicode.UTF8BOM, // UTF-8 with BOM
|
||||
"utf-8": unicode.UTF8BOM, // UTF-8
|
||||
"utf-16le": unicode.UTF16(unicode.LittleEndian, unicode.UseBOM), // UTF-16 Little Endian
|
||||
"utf-16be": unicode.UTF16(unicode.BigEndian, unicode.UseBOM), // UTF-16 Big Endian
|
||||
"utf-16le-bom": unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM), // UTF-16 Little Endian with BOM
|
||||
"utf-16be-bom": unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM), // UTF-16 Big Endian with BOM
|
||||
"cp437": charmap.CodePage437, // OEM United States (CP-437)
|
||||
"cp863": charmap.CodePage863, // OEM Canadian French (CP-863)
|
||||
"cp037": charmap.CodePage037, // IBM EBCDIC US/Canada (CP-037)
|
||||
|
||||
@@ -13,11 +13,8 @@ export const UTF_8 = 'utf-8';
|
||||
|
||||
export const SUPPORTED_FILE_ENCODINGS: string[] = [
|
||||
UTF_8, // UTF-8
|
||||
'utf-8-bom', // UTF-8 with BOM
|
||||
'utf-16le', // UTF-16 Little Endian
|
||||
'utf-16be', // UTF-16 Big Endian
|
||||
'utf-16le-bom', // UTF-16 Little Endian with BOM
|
||||
'utf-16be-bom', // UTF-16 Big Endian with BOM
|
||||
'cp437', // OEM United States (CP-437)
|
||||
'cp863', // OEM Canadian French (CP-863)
|
||||
'cp037', // IBM EBCDIC US/Canada (CP-037)
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 con BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian con BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian con BOM",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 avec BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM États-Unis (CP-437)",
|
||||
"cp863": "OEM Canadien Français (CP-863)",
|
||||
"cp037": "IBM EBCDIC États-Unis/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM 米国 (CP-437)",
|
||||
"cp863": "OEM カナダ系フランス語 (CP-863)",
|
||||
"cp037": "IBM EBCDIC 米国/カナダ (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "BOM ಸಹಿತ UTF-8",
|
||||
"utf-16le": "UTF-16 ಲಿಟಲ್ ಎಂಡಿಯನ್",
|
||||
"utf-16be": "UTF-16 ಬಿಗ್ ಎಂಡಿಯನ್",
|
||||
"utf-16le-bom": "BOM ಸಹಿತ UTF-16 ಲಿಟಲ್ ಎಂಡಿಯನ್",
|
||||
"utf-16be-bom": "BOM ಸಹಿತ UTF-16 ಬಿಗ್ ಎಂಡಿಯನ್",
|
||||
"cp437": "OEM ಯುನೈಟೆಡ್ ಸ್ಟೇಟ್ಸ್ (CP-437)",
|
||||
"cp863": "OEM ಕ್ಯಾನಡಿಯನ್ ಫ್ರೆಂಚ್ (CP-863)",
|
||||
"cp037": "IBM EBCDIC ಯುಎಸ್/ಕ್ಯಾನಡಾ (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 리틀 엔디안",
|
||||
"utf-16be": "UTF-16 빅 엔디안",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM 미국 (CP-437)",
|
||||
"cp863": "OEM 캐나다 프랑스어 (CP-863)",
|
||||
"cp037": "IBM EBCDIC 미국/캐나다 (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 com BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM Estados Unidos (CP-437)",
|
||||
"cp863": "OEM Francês Canadense (CP-863)",
|
||||
"cp037": "IBM EBCDIC EUA/Canadá (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 z BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian z BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian z BOM",
|
||||
"cp437": "OEM Združene države (CP-437)",
|
||||
"cp863": "OEM kanadska francoščina (CP-863)",
|
||||
"cp037": "IBM EBCDIC ZDA/Kanada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "BOM உடன் UTF-8",
|
||||
"utf-16le": "UTF-16 லிட்டில் எண்டியன்",
|
||||
"utf-16be": "UTF-16 பிக் எண்டியன்",
|
||||
"utf-16le-bom": "BOM உடன் UTF-16 லிட்டில் எண்டியன்",
|
||||
"utf-16be-bom": "BOM உடன் UTF-16 பிக் எண்டியன்",
|
||||
"cp437": "OEM அமெரிக்கா (CP-437)",
|
||||
"cp863": "OEM கனடா பிரஞ்சு (CP-863)",
|
||||
"cp037": "IBM EBCDIC அமெரிக்கா/கனடா (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 พร้อม BOM",
|
||||
"utf-16le": "UTF-16 Endian เล็ก",
|
||||
"utf-16be": "UTF-16 Endian ใหญ่",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM สหรัฐอเมริกา (CP-437)",
|
||||
"cp863": "OEM ฝรั่งเศสแคนาดา (CP-863)",
|
||||
"cp037": "IBM EBCDIC สหรัฐอเมริกา/แคนาดา (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "BOM ile UTF-8",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "BOM ile UTF-16 Little Endian",
|
||||
"utf-16be-bom": "BOM ile UTF-16 Big Endian",
|
||||
"cp437": "OEM Birleşik Devletler (CP-437)",
|
||||
"cp863": "OEM Kanada Fransızcası (CP-863)",
|
||||
"cp037": "IBM EBCDIC ABD/Kanada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 with BOM",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian with BOM",
|
||||
"utf-16be-bom": "UTF-16 Big Endian with BOM",
|
||||
"cp437": "OEM United States (CP-437)",
|
||||
"cp863": "OEM Canadian French (CP-863)",
|
||||
"cp037": "IBM EBCDIC US/Canada (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 带签名",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian 带签名",
|
||||
"utf-16be-bom": "UTF-16 Big Endian 带签名",
|
||||
"cp437": "OEM 美国 (CP-437)",
|
||||
"cp863": "OEM 加拿大法语 (CP-863)",
|
||||
"cp037": "IBM EBCDIC 美国/加拿大 (CP-037)",
|
||||
|
||||
@@ -1353,11 +1353,8 @@
|
||||
},
|
||||
"encoding": {
|
||||
"utf-8": "UTF-8",
|
||||
"utf-8-bom": "UTF-8 帶簽名",
|
||||
"utf-16le": "UTF-16 Little Endian",
|
||||
"utf-16be": "UTF-16 Big Endian",
|
||||
"utf-16le-bom": "UTF-16 Little Endian 帶簽名",
|
||||
"utf-16be-bom": "UTF-16 Big Endian 帶簽名",
|
||||
"cp437": "OEM 美國 (CP-437)",
|
||||
"cp863": "OEM 加拿大法語 (CP-863)",
|
||||
"cp037": "IBM EBCDIC 美國/加拿大 (CP-037)",
|
||||
|
||||
Reference in New Issue
Block a user