From fef018190729c27faa2e77a15b24c4c1b451e803 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Tue, 18 Oct 2022 01:25:00 +0700 Subject: [PATCH] SWEEP: Lucene.Net.Benchmark.Support.TagSoup: Reviewed API for accessibility issues. Fixed error handling and guard clauses. Changed to generic collections. Renamed method arguments. --- .../Support/TagSoup/Guard.cs | 39 +++ .../Support/TagSoup/HTMLScanner.cs | 38 +-- .../Support/TagSoup/PYXScanner.cs | 50 ++-- .../Support/TagSoup/PYXWriter.cs | 142 ++++++---- .../Support/TagSoup/Parser.cs | 266 +++++++++++------- .../Support/TagSoup/ScanHandler.cs | 30 +- .../Support/TagSoup/Scanner.cs | 10 +- .../Support/TagSoup/Schema.cs | 29 +- .../Support/TagSoup/XMLWriter.cs | 65 +++-- 9 files changed, 427 insertions(+), 242 deletions(-) create mode 100644 src/Lucene.Net.Benchmark/Support/TagSoup/Guard.cs diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Guard.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Guard.cs new file mode 100644 index 0000000000..3f4baaaa81 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Guard.cs @@ -0,0 +1,39 @@ +using System; + +namespace TagSoup +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + // LUCENENET specific class to simplify adding guard clause checks to dozens of APIs with the same parameters + internal static class Guard + { + public static void BufferAndRangeCheck(T[] buffer, int startIndex, int length) + { + // Note that this is the order the Apache Harmony tests expect it to be checked in. + if (startIndex < 0) + throw new ArgumentOutOfRangeException(nameof(startIndex), startIndex, $"{nameof(startIndex)} must not be negative."); + if (buffer is null) + throw new ArgumentNullException(nameof(buffer)); + if (startIndex > buffer.Length - length) // Checks for int overflow + throw new ArgumentException($"{nameof(startIndex)} + {nameof(length)} may not be greater than the size of {nameof(buffer)}"); + if (length < 0) + throw new ArgumentOutOfRangeException(nameof(length), length, $"{nameof(length)} must not be negative."); + } + + } +} diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs index 54e3f1db87..4992a4fabd 100644 --- a/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs @@ -1,4 +1,4 @@ -// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. // // TagSoup is licensed under the Apache License, // Version 2.0. You may obtain a copy of this license at @@ -13,11 +13,10 @@ // // +using Lucene; using Sax; using System; -using System.Globalization; using System.IO; -using System.Text; namespace TagSoup { @@ -297,7 +296,7 @@ public class HTMLScanner : IScanner, ILocator /// next state = statetable[value + 3]. That is, the value points /// to the start of the answer 4-tuple in the statetable. /// - private static short[][] statetableIndex; + private static readonly short[][] statetableIndex = LoadStateTableIndex(ref statetableIndexMaxChar); // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006) /// /// The highest character value seen in the statetable. @@ -305,8 +304,10 @@ public class HTMLScanner : IScanner, ILocator /// is used. /// private static int statetableIndexMaxChar; - public HTMLScanner() + + private static short[][] LoadStateTableIndex(ref int statetableIndexMaxChar) { + short[][] result; int maxState = -1; int maxChar = -1; for (int i = 0; i < statetable.Length; i += 4) @@ -322,11 +323,11 @@ public HTMLScanner() } statetableIndexMaxChar = maxChar + 1; - statetableIndex = new short[maxState + 1][]; + result = new short[maxState + 1][]; for (int i = 0; i <= maxState; i++) { - statetableIndex[i] = new short[maxChar + 3]; + result[i] = new short[maxChar + 3]; } for (int theState = 0; theState <= maxState; ++theState) { @@ -353,9 +354,10 @@ public HTMLScanner() break; } } - statetableIndex[theState][ch + 2] = (short)hit; + result[theState][ch + 2] = (short)hit; } } + return result; } // Locator implementation @@ -372,14 +374,14 @@ public HTMLScanner() // Scanner implementation /// - /// Reset document locator, supplying systemid and publicid. + /// Reset document locator, supplying systemId and publicId. /// - /// System id - /// Public id - public virtual void ResetDocumentLocator(string publicid, string systemid) + /// System id + /// Public id + public virtual void ResetDocumentLocator(string publicId, string systemId) { - thePublicid = publicid; - theSystemid = systemid; + thePublicid = publicId; + theSystemid = systemId; theLastLine = theLastColumn = theCurrentLine = theCurrentColumn = 0; } @@ -440,9 +442,9 @@ public virtual void Scan(TextReader r, IScanHandler h) switch (action) { case 0: - throw new Exception( - "HTMLScanner can't cope with " + (int)ch + " in state " + - (int)theState); + throw Error.Create( + "HTMLScanner can't cope with " + ch + " in state " + + theState); case A_ADUP: h.Adup(theOutputBuffer, 0, theSize); theSize = 0; @@ -668,7 +670,7 @@ public virtual void Scan(TextReader r, IScanHandler h) theSize = 0; break; default: - throw new Exception("Can't process state " + action); + throw Error.Create("Can't process state " + action); } if (!unread) { diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs index 801324aa07..33dead8415 100644 --- a/src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs @@ -38,7 +38,7 @@ namespace TagSoup /// public class PYXScanner : IScanner { - public virtual void ResetDocumentLocator(string publicid, string systemid) + public virtual void ResetDocumentLocator(string publicId, string systemId) { // Need this method for interface compatibility, but note // that PyxScanner does not implement Locator. @@ -46,74 +46,80 @@ public virtual void ResetDocumentLocator(string publicid, string systemid) public virtual void Scan(TextReader br, IScanHandler h) { + // LUCENENET: Added guard clauses + if (br is null) + throw new ArgumentNullException(nameof(br)); + if (h is null) + throw new ArgumentNullException(nameof(h)); + string s; - char[] buff = null; + char[] buffer = null; bool instag = false; while ((s = br.ReadLine()) != null) { int size = s.Length; - buff = s.ToCharArray(0, size); - if (buff.Length < size) + buffer = s.ToCharArray(0, size); + if (buffer.Length < size) { - buff = new char[size]; + buffer = new char[size]; } - switch (buff[0]) + switch (buffer[0]) { case '(': if (instag) { - h.STagC(buff, 0, 0); + h.STagC(buffer, 0, 0); //instag = false; // LUCENENET: IDE0059: Remove unnecessary value assignment } - h.GI(buff, 1, size - 1); + h.GI(buffer, 1, size - 1); instag = true; break; case ')': if (instag) { - h.STagC(buff, 0, 0); + h.STagC(buffer, 0, 0); instag = false; } - h.ETag(buff, 1, size - 1); + h.ETag(buffer, 1, size - 1); break; case '?': if (instag) { - h.STagC(buff, 0, 0); + h.STagC(buffer, 0, 0); instag = false; } - h.PI(buff, 1, size - 1); + h.PI(buffer, 1, size - 1); break; case 'A': int sp = s.IndexOf(' '); - h.Aname(buff, 1, sp - 1); - h.Aval(buff, sp + 1, size - sp - 1); + h.Aname(buffer, 1, sp - 1); + h.Aval(buffer, sp + 1, size - sp - 1); break; case '-': if (instag) { - h.STagC(buff, 0, 0); + h.STagC(buffer, 0, 0); instag = false; } if (s.Equals("-\\n", StringComparison.Ordinal)) { - buff[0] = '\n'; - h.PCDATA(buff, 0, 1); + buffer[0] = '\n'; + h.PCDATA(buffer, 0, 1); } else { // FIXME: // Does not decode \t and \\ in input - h.PCDATA(buff, 1, size - 1); + h.PCDATA(buffer, 1, size - 1); } break; case 'E': if (instag) { - h.STagC(buff, 0, 0); + h.STagC(buffer, 0, 0); instag = false; } - h.Entity(buff, 1, size - 1); + h.Entity(buffer, 1, size - 1); break; default: // System.err.print("Gotcha "); @@ -122,10 +128,10 @@ public virtual void Scan(TextReader br, IScanHandler h) break; } } - h.EOF(buff, 0, 0); + h.EOF(buffer, 0, 0); } - public void StartCDATA() + public virtual void StartCDATA() { } diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/PYXWriter.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/PYXWriter.cs index b3e349e6ef..c06eefd069 100644 --- a/src/Lucene.Net.Benchmark/Support/TagSoup/PYXWriter.cs +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/PYXWriter.cs @@ -18,6 +18,7 @@ using Sax; using Sax.Ext; +using System; using System.IO; namespace TagSoup @@ -34,82 +35,100 @@ public class PYXWriter : IScanHandler, IContentHandler, ILexicalHandler // ScanHandler implementation - public void Adup(char[] buff, int offset, int length) + public virtual void Adup(char[] buffer, int startIndex, int length) { theWriter.WriteLine(attrName); attrName = null; } - public void Aname(char[] buff, int offset, int length) + public virtual void Aname(char[] buffer, int startIndex, int length) { + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + theWriter.Write('A'); - theWriter.Write(buff, offset, length); + theWriter.Write(buffer, startIndex, length); theWriter.Write(' '); - attrName = new string(buff, offset, length); + attrName = new string(buffer, startIndex, length); } - public void Aval(char[] buff, int offset, int length) + public virtual void Aval(char[] buffer, int startIndex, int length) { - theWriter.Write(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + theWriter.Write(buffer, startIndex, length); theWriter.WriteLine(); attrName = null; } - public void Cmnt(char[] buff, int offset, int length) + public virtual void Cmnt(char[] buffer, int startIndex, int length) { // theWriter.Write('!'); - // theWriter.Write(buff, offset, length); + // theWriter.Write(buffer, startIndex, length); // theWriter.WriteLine(); } - public void Entity(char[] buff, int offset, int length) + public virtual void Entity(char[] buffer, int startIndex, int length) { } - public int GetEntity() + public virtual int GetEntity() { return 0; } - public void EOF(char[] buff, int offset, int length) + public virtual void EOF(char[] buffer, int startIndex, int length) { theWriter.Dispose(); } - public void ETag(char[] buff, int offset, int length) + public virtual void ETag(char[] buffer, int startIndex, int length) { + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + theWriter.Write(')'); - theWriter.Write(buff, offset, length); + theWriter.Write(buffer, startIndex, length); theWriter.WriteLine(); } - public void Decl(char[] buff, int offset, int length) + public virtual void Decl(char[] buffer, int startIndex, int length) { } - public void GI(char[] buff, int offset, int length) + public virtual void GI(char[] buffer, int startIndex, int length) { + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + theWriter.Write('('); - theWriter.Write(buff, offset, length); + theWriter.Write(buffer, startIndex, length); theWriter.WriteLine(); } - public void CDSect(char[] buff, int offset, int length) + public virtual void CDSect(char[] buffer, int startIndex, int length) { - PCDATA(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + PCDATA(buffer, startIndex, length); } - public void PCDATA(char[] buff, int offset, int length) + public virtual void PCDATA(char[] buffer, int startIndex, int length) { if (length == 0) { return; // nothing to do } + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + bool inProgress = false; - length += offset; - for (int i = offset; i < length; i++) + length += startIndex; + for (int i = startIndex; i < length; i++) { - if (buff[i] == '\n') + if (buffer[i] == '\n') { if (inProgress) { @@ -124,7 +143,7 @@ public void PCDATA(char[] buff, int offset, int length) { theWriter.Write('-'); } - switch (buff[i]) + switch (buffer[i]) { case '\t': theWriter.Write("\\t"); @@ -133,7 +152,7 @@ public void PCDATA(char[] buff, int offset, int length) theWriter.Write("\\\\"); break; default: - theWriter.Write(buff[i]); + theWriter.Write(buffer[i]); break; } inProgress = true; @@ -145,42 +164,51 @@ public void PCDATA(char[] buff, int offset, int length) } } - public void PITarget(char[] buff, int offset, int length) + public virtual void PITarget(char[] buffer, int startIndex, int length) { + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + theWriter.Write('?'); - theWriter.Write(buff, offset, length); + theWriter.Write(buffer, startIndex, length); theWriter.Write(' '); } - public void PI(char[] buff, int offset, int length) + public virtual void PI(char[] buffer, int startIndex, int length) { - theWriter.Write(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + theWriter.Write(buffer, startIndex, length); theWriter.WriteLine(); } - public void STagC(char[] buff, int offset, int length) + public virtual void STagC(char[] buffer, int startIndex, int length) { // theWriter.WriteLine("!"); // FIXME } - public void STagE(char[] buff, int offset, int length) + public virtual void STagE(char[] buffer, int startIndex, int length) { theWriter.WriteLine("!"); // FIXME } // SAX ContentHandler implementation - public void Characters(char[] buff, int offset, int length) + public virtual void Characters(char[] buffer, int startIndex, int length) { - PCDATA(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + PCDATA(buffer, startIndex, length); } - public void EndDocument() + public virtual void EndDocument() { theWriter.Dispose(); } - public void EndElement(string uri, string localname, string qname) + public virtual void EndElement(string uri, string localname, string qname) { if (qname.Length == 0) { @@ -190,16 +218,19 @@ public void EndElement(string uri, string localname, string qname) theWriter.WriteLine(qname); } - public void EndPrefixMapping(string prefix) + public virtual void EndPrefixMapping(string prefix) { } - public void IgnorableWhitespace(char[] buff, int offset, int length) + public virtual void IgnorableWhitespace(char[] buffer, int startIndex, int length) { - Characters(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + Characters(buffer, startIndex, length); } - public void ProcessingInstruction(string target, string data) + public virtual void ProcessingInstruction(string target, string data) { theWriter.Write('?'); theWriter.Write(target); @@ -207,24 +238,31 @@ public void ProcessingInstruction(string target, string data) theWriter.WriteLine(data); } - public void SetDocumentLocator(ILocator locator) + public virtual void SetDocumentLocator(ILocator locator) { } - public void SkippedEntity(string name) + public virtual void SkippedEntity(string name) { } - public void StartDocument() + public virtual void StartDocument() { } - public void StartElement(string uri, string localname, string qname, IAttributes atts) + public virtual void StartElement(string uri, string localname, string qname, IAttributes atts) { + // LUCENENET: Added guard clauses + if (qname is null) + throw new ArgumentNullException(nameof(qname)); + if (atts is null) + throw new ArgumentNullException(nameof(atts)); + if (qname.Length == 0) { qname = localname; } + theWriter.Write('('); theWriter.WriteLine(qname); int length = atts.Length; @@ -243,44 +281,44 @@ public void StartElement(string uri, string localname, string qname, IAttributes } } - public void StartPrefixMapping(string prefix, string uri) + public virtual void StartPrefixMapping(string prefix, string uri) { } - public void Comment(char[] ch, int start, int length) + public virtual void Comment(char[] ch, int start, int length) { Cmnt(ch, start, length); } - public void EndCDATA() + public virtual void EndCDATA() { } - public void EndDTD() + public virtual void EndDTD() { } - public void EndEntity(string name) + public virtual void EndEntity(string name) { } - public void StartCDATA() + public virtual void StartCDATA() { } - public void StartDTD(string name, string publicId, string systemId) + public virtual void StartDTD(string name, string publicId, string systemId) { } - public void StartEntity(string name) + public virtual void StartEntity(string name) { } // Constructor - public PYXWriter(TextWriter w) + public PYXWriter(TextWriter writer) { - theWriter = w; + theWriter = writer ?? throw new ArgumentNullException(nameof(writer)); } } } diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs index 2e31c0f07f..dcf4737ce6 100644 --- a/src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs @@ -21,7 +21,7 @@ using Sax.Ext; using Sax.Helpers; using System; -using System.Collections; +using System.Collections.Generic; using System.IO; using System.Text; using JCG = J2N.Collections.Generic; @@ -256,7 +256,7 @@ public class Parser : DefaultHandler, IScanHandler, IXMLReader, ILexicalHandler // the corresponding instance variables, but care must be taken // to keep them in sync. - private readonly Hashtable features = new Hashtable { + private readonly IDictionary features = new Dictionary { { NAMESPACES_FEATURE, DEFAULT_NAMESPACES }, { NAMESPACE_PREFIXES_FEATURE, false }, { EXTERNAL_GENERAL_ENTITIES_FEATURE, false }, @@ -283,9 +283,9 @@ public class Parser : DefaultHandler, IScanHandler, IXMLReader, ILexicalHandler public virtual bool GetFeature(string name) { - if (features.ContainsKey(name)) + if (features.TryGetValue(name, out bool value)) { - return (bool)features[name]; + return value; } throw new SAXNotRecognizedException("Unknown feature " + name); } @@ -338,6 +338,10 @@ public virtual void SetFeature(string name, bool value) public virtual object GetProperty(string name) { + // LUCENENET: Added guard clause + if (name is null) + throw new ArgumentNullException(nameof(name)); + if (name.Equals(LEXICAL_HANDLER_PROPERTY, StringComparison.Ordinal)) { return theLexicalHandler == this ? null : theLexicalHandler; @@ -359,6 +363,12 @@ public virtual object GetProperty(string name) public virtual void SetProperty(string name, object value) { + // LUCENENET: Added guard clauses + if (name is null) + throw new ArgumentNullException(nameof(name)); + if (value is null) + throw new ArgumentNullException(nameof(value)); + if (name.Equals(LEXICAL_HANDLER_PROPERTY, StringComparison.Ordinal)) { if (value is null) @@ -442,6 +452,10 @@ public virtual IErrorHandler ErrorHandler public virtual void Parse(InputSource input) { + // LUCENENET: Added guard clause + if (input is null) + throw new ArgumentNullException(nameof(input)); + Setup(); TextReader r = GetReader(input); theContentHandler.StartDocument(); @@ -457,9 +471,13 @@ public virtual void Parse(InputSource input) theScanner.Scan(r, this); } - public virtual void Parse(string systemid) + public virtual void Parse(string systemId) { - Parse(new InputSource(systemid)); + // LUCENENET: Added guard clause + if (systemId is null) + throw new ArgumentNullException(nameof(systemId)); + + Parse(new InputSource(systemId)); } // Sets up instance variables that haven't been set by setFeature @@ -499,13 +517,13 @@ private TextReader GetReader(InputSource s) TextReader r = s.TextReader; Stream i = s.Stream; Encoding encoding = s.Encoding; - string publicid = s.PublicId; - string systemid = s.SystemId; + string publicId = s.PublicId; + string systemId = s.SystemId; if (r is null) { if (i is null) { - i = GetInputStream(publicid, systemid); + i = GetInputStream(publicId, systemId); } if (!(i is BufferedStream)) { @@ -531,18 +549,22 @@ private TextReader GetReader(InputSource s) } /// - /// Get an Stream based on a publicid and a systemid + /// Get an Stream based on a publicId and a systemId /// We don't process publicids (who uses them anyhow?) /// - /// - /// + /// + /// /// #pragma warning disable IDE0060 // Remove unused parameter - private static Stream GetInputStream(string publicid, string systemid) // LUCENENET: CA1822: Mark members as static + private static Stream GetInputStream(string publicId, string systemId) // LUCENENET: CA1822: Mark members as static #pragma warning restore IDE0060 // Remove unused parameter { + // LUCENENET: Added guard clause + if (systemId is null) + throw new ArgumentNullException(nameof(systemId)); + var basis = new Uri("file://" + Directory.GetCurrentDirectory() + Path.DirectorySeparatorChar); - var url = new Uri(basis, systemid); + var url = new Uri(basis, systemId); return new FileStream(url.LocalPath, FileMode.Open, FileAccess.Read, FileShare.Read); } @@ -561,7 +583,7 @@ private static Stream GetInputStream(string publicid, string systemid) // LUCENE private int theEntity; // needs to support chars past U+FFFF - public virtual void Adup(char[] buff, int offset, int length) + public virtual void Adup(char[] buffer, int startIndex, int length) { if (theNewElement is null || theAttributeName is null) { @@ -571,25 +593,31 @@ public virtual void Adup(char[] buff, int offset, int length) theAttributeName = null; } - public virtual void Aname(char[] buff, int offset, int length) + public virtual void Aname(char[] buffer, int startIndex, int length) { if (theNewElement is null) { return; } + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + // Currently we don't rely on Schema to canonicalize // attribute names. - theAttributeName = MakeName(buff, offset, length).ToLowerInvariant(); + theAttributeName = MakeName(buffer, startIndex, length).ToLowerInvariant(); // System.err.println("%% Attribute name " + theAttributeName); } - public virtual void Aval(char[] buff, int offset, int length) + public virtual void Aval(char[] buffer, int startIndex, int length) { if (theNewElement is null || theAttributeName is null) { return; } - var value = new string(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + var value = new string(buffer, startIndex, length); // System.err.println("%% Attribute value [" + value + "]"); value = ExpandEntities(value); theNewElement.SetAttribute(theAttributeName, null, value); @@ -661,54 +689,54 @@ private string ExpandEntities(string src) return new string(dst, 0, dstlen); } - public virtual void Entity(char[] buff, int offset, int length) + public virtual void Entity(char[] buffer, int startIndex, int length) { - theEntity = LookupEntity(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + theEntity = LookupEntity(buffer, startIndex, length); } /// /// Process numeric character references, /// deferring to the schema for named ones. /// - /// - /// + /// + /// /// /// - private int LookupEntity(char[] buff, int offset, int length) + private int LookupEntity(char[] buffer, int startIndex, int length) { int result = 0; if (length < 1) { return result; } - // System.err.println("%% Entity at " + offset + " " + length); - // System.err.println("%% Got entity [" + new string(buff, offset, length) + "]"); - if (buff[offset] == '#') + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + // System.err.println("%% Entity at " + startIndex + " " + length); + // System.err.println("%% Got entity [" + new string(buffer, startIndex, length) + "]"); + if (buffer[startIndex] == '#') { - if (length > 1 && (buff[offset + 1] == 'x' || buff[offset + 1] == 'X')) - { - try - { - return Convert.ToInt32(new string(buff, offset + 2, length - 2), 16); - } - catch (FormatException) - { - return 0; - } - } - try - { - return Convert.ToInt32(new string(buff, offset + 1, length - 1), 10); - } - catch (FormatException) + if (length > 1 && (buffer[startIndex + 1] == 'x' || buffer[startIndex + 1] == 'X')) { + // LUCENENET: don't allow int parsing to throw exceptions + if (J2N.Numerics.Int32.TryParse(buffer, startIndex + 2, length - 2, 16, out result)) + return result; + return 0; } + // LUCENENET: don't allow int parsing to throw exceptions + if (J2N.Numerics.Int32.TryParse(buffer, startIndex + 1, length - 1, 10, out result)) + return result; + + return 0; } - return theSchema.GetEntity(new string(buff, offset, length)); + return theSchema.GetEntity(new string(buffer, startIndex, length)); } - public virtual void EOF(char[] buff, int offset, int length) + public virtual void EOF(char[] buffer, int startIndex, int length) { if (virginStack) { @@ -725,18 +753,24 @@ public virtual void EOF(char[] buff, int offset, int length) theContentHandler.EndDocument(); } - public virtual void ETag(char[] buff, int offset, int length) + public virtual void ETag(char[] buffer, int startIndex, int length) { - if (ETagCdata(buff, offset, length)) + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + if (ETagCdata(buffer, startIndex, length)) { return; } - ETagBasic(buff, offset, length); + ETagBasic(buffer, startIndex, length); } private static readonly char[] etagchars = { '<', '/', '>' }; - public virtual bool ETagCdata(char[] buff, int offset, int length) + public virtual bool ETagCdata(char[] buffer, int startIndex, int length) { + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + string currentName = theStack.Name; // If this is a CDATA element and the tag doesn't match, // or isn't properly formed (junk after the name), @@ -748,7 +782,7 @@ public virtual bool ETagCdata(char[] buff, int offset, int length) { for (int i = 0; i < length; i++) { - if (char.ToLowerInvariant(buff[offset + i]) != char.ToLowerInvariant(currentName[i])) + if (char.ToLowerInvariant(buffer[startIndex + i]) != char.ToLowerInvariant(currentName[i])) { realTag = false; break; @@ -758,7 +792,7 @@ public virtual bool ETagCdata(char[] buff, int offset, int length) if (!realTag) { theContentHandler.Characters(etagchars, 0, 2); - theContentHandler.Characters(buff, offset, length); + theContentHandler.Characters(buffer, startIndex, length); theContentHandler.Characters(etagchars, 2, 1); theScanner.StartCDATA(); return true; @@ -767,14 +801,17 @@ public virtual bool ETagCdata(char[] buff, int offset, int length) return false; } - public virtual void ETagBasic(char[] buff, int offset, int length) + public virtual void ETagBasic(char[] buffer, int startIndex, int length) { + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + theNewElement = null; string name; if (length != 0) { // Canonicalize case of name - name = MakeName(buff, offset, length); + name = MakeName(buffer, startIndex, length); // System.err.println("got etag [" + name + "]"); ElementType type = theSchema.GetElementType(name); if (type is null) @@ -994,15 +1031,18 @@ private bool Foreign(string prefix, string ns) /// markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment /// ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral /// - /// - /// + /// + /// /// - public virtual void Decl(char[] buff, int offset, int length) + public virtual void Decl(char[] buffer, int startIndex, int length) { - var s = new string(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + var s = new string(buffer, startIndex, length); string name = null; - string systemid = null; - string publicid = null; + string systemId = null; + string publicId = null; string[] v = Split(s); if (v.Length > 0 && "DOCTYPE".Equals(v[0], StringComparison.OrdinalIgnoreCase)) { @@ -1016,40 +1056,40 @@ public virtual void Decl(char[] buff, int offset, int length) name = v[1]; if (v.Length > 3 && "SYSTEM".Equals(v[2], StringComparison.Ordinal)) { - systemid = v[3]; + systemId = v[3]; } else if (v.Length > 3 && "PUBLIC".Equals(v[2], StringComparison.Ordinal)) { - publicid = v[3]; + publicId = v[3]; if (v.Length > 4) { - systemid = v[4]; + systemId = v[4]; } else { - systemid = ""; + systemId = ""; } } } } - publicid = TrimQuotes(publicid); - systemid = TrimQuotes(systemid); + publicId = TrimQuotes(publicId); + systemId = TrimQuotes(systemId); if (name != null) { - publicid = CleanPublicId(publicid); - theLexicalHandler.StartDTD(name, publicid, systemid); + publicId = CleanPublicId(publicId); + theLexicalHandler.StartDTD(name, publicId, systemId); theLexicalHandler.EndDTD(); theDoctypeName = name; - theDoctypePublicId = publicid; + theDoctypePublicId = publicId; if (theScanner is ILocator locator) { - // Must resolve systemid + // Must resolve systemId theDoctypeSystemId = locator.SystemId; try { if (Uri.IsWellFormedUriString(theDoctypeSystemId, UriKind.Absolute)) { - theDoctypeSystemId = new Uri(new Uri(theDoctypeSystemId), systemid).ToString(); + theDoctypeSystemId = new Uri(new Uri(theDoctypeSystemId), systemId).ToString(); } } catch (Exception) @@ -1179,13 +1219,16 @@ private static string CleanPublicId(string src) // LUCENENET: CA1822: Mark membe return dst.ToString().Trim(); // trim any final junk whitespace } - public virtual void GI(char[] buff, int offset, int length) + public virtual void GI(char[] buffer, int startIndex, int length) { if (theNewElement != null) { return; } - string name = MakeName(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + string name = MakeName(buffer, startIndex, length); if (name is null) { return; @@ -1212,51 +1255,62 @@ public virtual void GI(char[] buff, int offset, int length) // System.err.println("%% Got GI " + theNewElement.name()); } - public virtual void CDSect(char[] buff, int offset, int length) + public virtual void CDSect(char[] buffer, int startIndex, int length) { + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + theLexicalHandler.StartCDATA(); - PCDATA(buff, offset, length); + PCDATA(buffer, startIndex, length); theLexicalHandler.EndCDATA(); } - public virtual void PCDATA(char[] buff, int offset, int length) + public virtual void PCDATA(char[] buffer, int startIndex, int length) { if (length == 0) { return; } + + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + bool allWhite = true; for (int i = 0; i < length; i++) { - if (!char.IsWhiteSpace(buff[offset + i])) + if (!char.IsWhiteSpace(buffer[startIndex + i])) { allWhite = false; + break; // LUCENENET: No need to check the rest } } if (allWhite && !theStack.CanContain(thePCDATA)) { if (ignorableWhitespace) { - theContentHandler.IgnorableWhitespace(buff, offset, length); + theContentHandler.IgnorableWhitespace(buffer, startIndex, length); } } else { Rectify(thePCDATA); - theContentHandler.Characters(buff, offset, length); + theContentHandler.Characters(buffer, startIndex, length); } } - public virtual void PITarget(char[] buff, int offset, int length) + public virtual void PITarget(char[] buffer, int startIndex, int length) { if (theNewElement != null) { return; } - thePITarget = MakeName(buff, offset, length).Replace(':', '_'); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + thePITarget = MakeName(buffer, startIndex, length).Replace(':', '_'); } - public virtual void PI(char[] buff, int offset, int length) + public virtual void PI(char[] buffer, int startIndex, int length) { if (theNewElement != null || thePITarget is null) { @@ -1266,46 +1320,61 @@ public virtual void PI(char[] buff, int offset, int length) { return; } - // if (length > 0 && buff[length - 1] == '?') System.err.println("%% Removing ? from PI"); - if (length > 0 && buff[length - 1] == '?') + + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + // if (length > 0 && buffer[length - 1] == '?') System.err.println("%% Removing ? from PI"); + if (length > 0 && buffer[length - 1] == '?') { length--; // remove trailing ? } - theContentHandler.ProcessingInstruction(thePITarget, new string(buff, offset, length)); + theContentHandler.ProcessingInstruction(thePITarget, new string(buffer, startIndex, length)); thePITarget = null; } - public virtual void STagC(char[] buff, int offset, int length) + public virtual void STagC(char[] buffer, int startIndex, int length) { // System.err.println("%% Start-tag"); if (theNewElement is null) { return; } + + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + Rectify(theNewElement); if (theStack.Model == Schema.M_EMPTY) { // Force an immediate end tag - ETagBasic(buff, offset, length); + ETagBasic(buffer, startIndex, length); } } - public virtual void STagE(char[] buff, int offset, int length) + public virtual void STagE(char[] buffer, int startIndex, int length) { // System.err.println("%% Empty-tag"); if (theNewElement is null) { return; } + + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + Rectify(theNewElement); // Force an immediate end tag - ETagBasic(buff, offset, length); + ETagBasic(buffer, startIndex, length); } //private char[] theCommentBuffer = new char[2000]; // LUCENENET: Never read - public virtual void Cmnt(char[] buff, int offset, int length) + public virtual void Cmnt(char[] buffer, int startIndex, int length) { - theLexicalHandler.Comment(buff, offset, length); + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + + theLexicalHandler.Comment(buffer, startIndex, length); } /// @@ -1374,19 +1443,22 @@ public virtual int GetEntity() /// This no longer lowercases the result: we depend on Schema to /// canonicalize case. /// - /// - /// + /// + /// /// /// - private string MakeName(char[] buff, int offset, int length) + private string MakeName(char[] buffer, int startIndex, int length) { + // LUCENENET: Added guard clauses + Guard.BufferAndRangeCheck(buffer, startIndex, length); + var dst = new StringBuilder(length + 2); bool seenColon = false; bool start = true; - // string src = new string(buff, offset, length); // DEBUG - for (; length-- > 0; offset++) + // string src = new string(buffer, startIndex, length); // DEBUG + for (; length-- > 0; startIndex++) { - char ch = buff[offset]; + char ch = buffer[startIndex]; if (char.IsLetter(ch) || ch == '_') { start = false; @@ -1458,7 +1530,7 @@ public virtual void StartCDATA() { } - public virtual void StartDTD(string name, string publicid, string systemid) + public virtual void StartDTD(string name, string publicId, string systemId) { } diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/ScanHandler.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/ScanHandler.cs index 3901ada345..cbd9922ba4 100644 --- a/src/Lucene.Net.Benchmark/Support/TagSoup/ScanHandler.cs +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/ScanHandler.cs @@ -24,77 +24,77 @@ public interface IScanHandler /// /// Reports an attribute name without a value. /// - void Adup(char[] buff, int offset, int length); + void Adup(char[] buffer, int startIndex, int length); /// /// Reports an attribute name; a value will follow. /// - void Aname(char[] buff, int offset, int length); + void Aname(char[] buffer, int startIndex, int length); /// /// Reports an attribute value. /// - void Aval(char[] buff, int offset, int length); + void Aval(char[] buffer, int startIndex, int length); /// /// Reports the content of a CDATA section (not a CDATA element) /// - void CDSect(char[] buff, int offset, int length); + void CDSect(char[] buffer, int startIndex, int length); /// /// Reports a <!....> declaration - typically a DOCTYPE /// - void Decl(char[] buff, int offset, int length); + void Decl(char[] buffer, int startIndex, int length); /// /// Reports an entity reference or character reference. /// - void Entity(char[] buff, int offset, int length); + void Entity(char[] buffer, int startIndex, int length); /// /// Reports EOF. /// - void EOF(char[] buff, int offset, int length); + void EOF(char[] buffer, int startIndex, int length); /// /// Reports an end-tag. /// - void ETag(char[] buff, int offset, int length); + void ETag(char[] buffer, int startIndex, int length); /// /// Reports the general identifier (element type name) of a start-tag. /// - void GI(char[] buff, int offset, int length); + void GI(char[] buffer, int startIndex, int length); /// /// Reports character content. /// - void PCDATA(char[] buff, int offset, int length); + void PCDATA(char[] buffer, int startIndex, int length); /// /// Reports the data part of a processing instruction. /// - void PI(char[] buff, int offset, int length); + void PI(char[] buffer, int startIndex, int length); /// /// Reports the target part of a processing instruction. /// - void PITarget(char[] buff, int offset, int length); + void PITarget(char[] buffer, int startIndex, int length); /// /// Reports the close of a start-tag. /// - void STagC(char[] buff, int offset, int length); + void STagC(char[] buffer, int startIndex, int length); /// /// Reports the close of an empty-tag. /// - void STagE(char[] buff, int offset, int length); + void STagE(char[] buffer, int startIndex, int length); /// /// Reports a comment. /// - void Cmnt(char[] buff, int offset, int length); + void Cmnt(char[] buffer, int startIndex, int length); /// /// Returns the value of the last entity or character reference reported. diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Scanner.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Scanner.cs index 5e4d406d91..5e0fb47664 100644 --- a/src/Lucene.Net.Benchmark/Support/TagSoup/Scanner.cs +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Scanner.cs @@ -37,13 +37,13 @@ public interface IScanner /// /// Reset the embedded locator. /// - /// - /// The publicid of the source + /// + /// The publicId of the source /// - /// - /// The systemid of the source + /// + /// The systemId of the source /// - void ResetDocumentLocator(string publicid, string systemid); + void ResetDocumentLocator(string publicId, string systemId); /// /// Signal to the scanner to start CDATA content mode. diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Schema.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Schema.cs index ba682faaf2..91b2f3092b 100644 --- a/src/Lucene.Net.Benchmark/Support/TagSoup/Schema.cs +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Schema.cs @@ -14,8 +14,9 @@ // // Model of document +using Lucene; using System; -using System.Collections; +using System.Collections.Generic; namespace TagSoup { @@ -34,8 +35,8 @@ public abstract class Schema public const int F_CDATA = 2; public const int F_NOFORCE = 4; - private readonly Hashtable theEntities = new Hashtable(); // string -> Character - private readonly Hashtable theElementTypes = new Hashtable(); // string -> ElementType + private readonly IDictionary theEntities = new Dictionary(); // string -> Character + private readonly IDictionary theElementTypes = new Dictionary(); // string -> ElementType private string theURI = ""; private string thePrefix = ""; @@ -75,7 +76,7 @@ public virtual void Attribute(string elemName, string attrName, string type, str ElementType e = GetElementType(elemName); if (e is null) { - throw new Exception("Attribute " + attrName + " specified for unknown element type " + elemName); + throw Error.Create("Attribute " + attrName + " specified for unknown element type " + elemName); } e.SetAttribute(attrName, type, value); } @@ -91,12 +92,12 @@ public virtual void Parent(string name, string parentName) ElementType parent = GetElementType(parentName); if (child is null) { - throw new Exception("No child " + name + " for parent " + parentName); + throw Error.Create("No child " + name + " for parent " + parentName); } if (parent is null) { #pragma warning disable IDE0016 // Use 'throw' expression - throw new Exception("No parent " + parentName + " for child " + name); + throw Error.Create("No parent " + parentName + " for child " + name); #pragma warning restore IDE0016 // Use 'throw' expression } child.Parent = parent; @@ -119,7 +120,11 @@ public virtual void Entity(string name, int value) /// The corresponding public virtual ElementType GetElementType(string name) { - return (ElementType)(theElementTypes[name.ToLowerInvariant()]); + // LUCENENET: Added guard clause + if (name is null) + throw new ArgumentNullException(nameof(name)); + + return theElementTypes.TryGetValue(name.ToLowerInvariant(), out ElementType value) ? value : null; } /// @@ -129,11 +134,15 @@ public virtual ElementType GetElementType(string name) /// The corresponding character, or 0 if none public virtual int GetEntity(string name) { + // LUCENENET: Added guard clause + if (name is null) + throw new ArgumentNullException(nameof(name)); + // System.err.println("%% Looking up entity " + name); - if (theEntities.ContainsKey(name)) + if (theEntities.TryGetValue(name, out int value)) { - return (int)theEntities[name]; - } + return value; + } return 0; } diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/XMLWriter.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/XMLWriter.cs index 5331082186..35b30f5877 100644 --- a/src/Lucene.Net.Benchmark/Support/TagSoup/XMLWriter.cs +++ b/src/Lucene.Net.Benchmark/Support/TagSoup/XMLWriter.cs @@ -22,6 +22,7 @@ using System.Collections.Generic; using System.Globalization; using System.IO; +using Console = Lucene.Net.Util.SystemConsole; namespace TagSoup { @@ -298,7 +299,7 @@ public virtual void StartCDATA() { } - public virtual void StartDTD(string name, string publicid, string systemid) + public virtual void StartDTD(string name, string publicId, string systemId) { if (name is null) { @@ -311,25 +312,25 @@ public virtual void StartDTD(string name, string publicid, string systemid) hasOutputDTD = true; Write("\n"); } @@ -359,9 +360,9 @@ private void Init(TextWriter writer) { SetOutput(writer); nsSupport = new NamespaceSupport(); - prefixTable = new Hashtable(); - forcedDeclTable = new Hashtable(); - doneDeclTable = new Hashtable(); + prefixTable = new Dictionary(); + forcedDeclTable = new Dictionary(); + doneDeclTable = new Dictionary(); outputProperties = new Dictionary(); } @@ -425,7 +426,7 @@ public virtual void SetOutput(TextWriter writer) { if (writer is null) { - output = new StreamWriter(Console.OpenStandardOutput()); + output = Console.Out; } else { @@ -453,6 +454,10 @@ public virtual void SetOutput(TextWriter writer) /// public virtual void SetPrefix(string uri, string prefix) { + // LUCENENET: Added guard clause + if (uri is null) + throw new ArgumentNullException(nameof(uri)); + prefixTable[uri] = prefix; } @@ -464,7 +469,11 @@ public virtual void SetPrefix(string uri, string prefix) /// public virtual string GetPrefix(string uri) { - return (string)(prefixTable.ContainsKey(uri) ? prefixTable[uri] : string.Empty); + // LUCENENET: Added guard clause + if (uri is null) + throw new ArgumentNullException(nameof(uri)); + + return prefixTable.TryGetValue(uri, out string value) ? value : string.Empty; } /// @@ -488,6 +497,10 @@ public virtual string GetPrefix(string uri) /// public virtual void ForceNSDecl(string uri) { + // LUCENENET: Added guard clause + if (uri is null) + throw new ArgumentNullException(nameof(uri)); + forcedDeclTable[uri] = true; } @@ -509,6 +522,10 @@ public virtual void ForceNSDecl(string uri) /// public virtual void ForceNSDecl(string uri, string prefix) { + // LUCENENET: Added guard clause + if (uri is null) + throw new ArgumentNullException(nameof(uri)); + SetPrefix(uri, prefix); ForceNSDecl(uri); } @@ -583,7 +600,7 @@ public override void EndDocument() } catch (Exception e) when (e.IsIOException()) { - throw new SAXException(e.Message, e); + throw new SAXException(e.ToString(), e); } } @@ -616,6 +633,10 @@ public override void EndDocument() /// public override void StartElement(string uri, string localName, string qName, IAttributes atts) { + // LUCENENET: Added guard clause + if (atts is null) + throw new ArgumentNullException(nameof(atts)); + elementLevel++; nsSupport.PushContext(); if (forceDTD && !hasOutputDTD) @@ -1173,16 +1194,14 @@ private string DoPrefix(string uri, string qName, bool isElement) { return prefix; } - bool containsPrefix = doneDeclTable.ContainsKey(uri); - prefix = (string)(containsPrefix ? doneDeclTable[uri] : null); + bool containsPrefix = doneDeclTable.TryGetValue(uri, out prefix); if (containsPrefix && ((!isElement || defaultNS != null) && "".Equals(prefix, StringComparison.Ordinal) || nsSupport.GetUri(prefix) != null)) { prefix = null; } if (prefix is null) { - containsPrefix = prefixTable.ContainsKey(uri); - prefix = (string)(containsPrefix ? prefixTable[uri] : null); + containsPrefix = prefixTable.TryGetValue(uri, out prefix); if (containsPrefix && ((!isElement || defaultNS != null) && "".Equals(prefix, StringComparison.Ordinal) || nsSupport.GetUri(prefix) != null)) { @@ -1549,9 +1568,9 @@ public virtual void SetOutputProperty(string key, string value) "selected" }; - private Hashtable prefixTable; - private Hashtable forcedDeclTable; - private Hashtable doneDeclTable; + private IDictionary prefixTable; + private IDictionary forcedDeclTable; + private IDictionary doneDeclTable; private int elementLevel = 0; private TextWriter output; private NamespaceSupport nsSupport;