From 6470ac568f41c3ba3eebb989d2906f71f1088894 Mon Sep 17 00:00:00 2001 From: Deepak Sattiraju Date: Wed, 8 Mar 2017 16:25:28 +0530 Subject: [PATCH] Improving the logic of encoding detection (#3753) --- .../resources.resjson/en-US/resources.resjson | 3 +- .../operations/FileEncoding.ts | 39 +++++++++++-------- Tasks/AzureResourceGroupDeployment/task.json | 3 +- .../task.loc.json | 3 +- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/Tasks/AzureResourceGroupDeployment/Strings/resources.resjson/en-US/resources.resjson b/Tasks/AzureResourceGroupDeployment/Strings/resources.resjson/en-US/resources.resjson index 85af51036eae..6ff6dd231c2b 100644 --- a/Tasks/AzureResourceGroupDeployment/Strings/resources.resjson/en-US/resources.resjson +++ b/Tasks/AzureResourceGroupDeployment/Strings/resources.resjson/en-US/resources.resjson @@ -160,6 +160,5 @@ "loc.messages.TimeoutWhileWaiting": "Timed out while waiting", "loc.messages.InvalidTemplateLocation": "The template location supplied is invalid. Task only supports 'Linked artifact' or 'URL of the file'", "loc.messages.EncodingNotSupported": "Encoding of the file '%s' is '%s' which is not supported. Supported encodings are ['utf-8', 'utf-16le']", - "loc.messages.CouldNotDetectEncoding": "Could not detect encoding of file '%s'", - "loc.messages.ShortFileBufferError": "Short file buffer error on file '%s'" + "loc.messages.CouldNotDetectEncoding": "Could not detect encoding of file '%s'" } \ No newline at end of file diff --git a/Tasks/AzureResourceGroupDeployment/operations/FileEncoding.ts b/Tasks/AzureResourceGroupDeployment/operations/FileEncoding.ts index d625ba5c9433..0b2c822f02db 100644 --- a/Tasks/AzureResourceGroupDeployment/operations/FileEncoding.ts +++ b/Tasks/AzureResourceGroupDeployment/operations/FileEncoding.ts @@ -20,16 +20,16 @@ function detectFileEncodingWithBOM(fileName: string, buffer: Buffer) { type = 'utf-8'; } else if (buffer.slice(0, 4).equals(new Buffer([255, 254, 0, 0]))) { - type = 'UTF-32LE'; + type = 'utf-32le'; } else if (buffer.slice(0, 2).equals(new Buffer([254, 255]))) { - type = 'UTF-16BE'; + type = 'utf-16be'; } else if (buffer.slice(0, 2).equals(new Buffer([255, 254]))) { type = 'utf-16le'; } else if (buffer.slice(0, 4).equals(new Buffer([0, 0, 254, 255]))) { - type = 'UTF-32BE'; + type = 'utf-32be'; } else { tl.debug('Unable to detect File encoding using BOM'); @@ -40,41 +40,48 @@ function detectFileEncodingWithBOM(fileName: string, buffer: Buffer) { function detectFileEncodingWithoutBOM(fileName: string, buffer: Buffer) { tl.debug('Detecting file encoding without BOM'); + if (buffer.length < 4) { + tl.debug('Short file buffer error on file ' + fileName + '. length: ' + buffer.length); + } + var typeCode = 0; var type: string; - for (var index = 0; index < 4; index++) { + var codeForUtf8 = 0 + for (var index = 0; index < 4 && index < buffer.length; index++) { typeCode = typeCode << 1; typeCode = typeCode | (buffer[index] > 0 ? 1 : 0); + codeForUtf8 = codeForUtf8 << 1; + codeForUtf8++; } switch (typeCode) { case 1: - type = 'UTF-32BE'; + type = 'utf-32be'; break; case 5: - type = 'UTF-16BE'; + type = 'utf-16be'; break; case 8: - type = 'UTF-32LE'; + type = 'utf-32le'; break; case 10: type = 'utf-16le'; break; - case 15: - type = 'utf-8'; - break; default: - return null; + if (codeForUtf8 == typeCode) { + type = 'utf-8'; + } + else { + return null; + } } return new FileEncoding(type, false); } export function detectFileEncoding(fileName: string, buffer: Buffer): FileEncoding { - if (buffer.length < 4) { - tl.debug(tl.loc('ShortFileBufferError', fileName)) - throw Error(tl.loc("CouldNotDetectEncoding", fileName)); - } + var fileEncoding: FileEncoding = detectFileEncodingWithBOM(fileName, buffer); - if (fileEncoding == null) + if (fileEncoding == null) { fileEncoding = detectFileEncodingWithoutBOM(fileName, buffer); + } if (fileEncoding == null) { throw new Error(tl.loc("CouldNotDetectEncoding", fileName)); diff --git a/Tasks/AzureResourceGroupDeployment/task.json b/Tasks/AzureResourceGroupDeployment/task.json index 00ebbb2d917d..401ebad7fca3 100644 --- a/Tasks/AzureResourceGroupDeployment/task.json +++ b/Tasks/AzureResourceGroupDeployment/task.json @@ -369,7 +369,6 @@ "TimeoutWhileWaiting": "Timed out while waiting", "InvalidTemplateLocation": "The template location supplied is invalid. Task only supports 'Linked artifact' or 'URL of the file'", "EncodingNotSupported": "Encoding of the file '%s' is '%s' which is not supported. Supported encodings are ['utf-8', 'utf-16le']", - "CouldNotDetectEncoding": "Could not detect encoding of file '%s'", - "ShortFileBufferError": "Short file buffer error on file '%s'" + "CouldNotDetectEncoding": "Could not detect encoding of file '%s'" } } \ No newline at end of file diff --git a/Tasks/AzureResourceGroupDeployment/task.loc.json b/Tasks/AzureResourceGroupDeployment/task.loc.json index c57f25997b4f..d01c91c97233 100644 --- a/Tasks/AzureResourceGroupDeployment/task.loc.json +++ b/Tasks/AzureResourceGroupDeployment/task.loc.json @@ -369,7 +369,6 @@ "TimeoutWhileWaiting": "ms-resource:loc.messages.TimeoutWhileWaiting", "InvalidTemplateLocation": "ms-resource:loc.messages.InvalidTemplateLocation", "EncodingNotSupported": "ms-resource:loc.messages.EncodingNotSupported", - "CouldNotDetectEncoding": "ms-resource:loc.messages.CouldNotDetectEncoding", - "ShortFileBufferError": "ms-resource:loc.messages.ShortFileBufferError" + "CouldNotDetectEncoding": "ms-resource:loc.messages.CouldNotDetectEncoding" } } \ No newline at end of file