Skip to content

Commit

Permalink
SWEEP: Lucene.Net.Benchmark.Support.TagSoup: Reviewed API for accessi…
Browse files Browse the repository at this point in the history
…bility issues. Fixed error handling and guard clauses. Changed to generic collections. Renamed method arguments.
  • Loading branch information
NightOwl888 committed Oct 17, 2022
1 parent 878dc64 commit fef0181
Show file tree
Hide file tree
Showing 9 changed files with 427 additions and 242 deletions.
39 changes: 39 additions & 0 deletions src/Lucene.Net.Benchmark/Support/TagSoup/Guard.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using System;

namespace TagSoup
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// LUCENENET specific class to simplify adding guard clause checks to dozens of APIs with the same parameters
internal static class Guard
{
public static void BufferAndRangeCheck<T>(T[] buffer, int startIndex, int length)
{
// Note that this is the order the Apache Harmony tests expect it to be checked in.
if (startIndex < 0)
throw new ArgumentOutOfRangeException(nameof(startIndex), startIndex, $"{nameof(startIndex)} must not be negative.");
if (buffer is null)
throw new ArgumentNullException(nameof(buffer));
if (startIndex > buffer.Length - length) // Checks for int overflow
throw new ArgumentException($"{nameof(startIndex)} + {nameof(length)} may not be greater than the size of {nameof(buffer)}");
if (length < 0)
throw new ArgumentOutOfRangeException(nameof(length), length, $"{nameof(length)} must not be negative.");
}

}
}
38 changes: 20 additions & 18 deletions src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
//
// TagSoup is licensed under the Apache License,
// Version 2.0. You may obtain a copy of this license at
Expand All @@ -13,11 +13,10 @@
//
//

using Lucene;
using Sax;
using System;
using System.Globalization;
using System.IO;
using System.Text;

namespace TagSoup
{
Expand Down Expand Up @@ -297,16 +296,18 @@ public class HTMLScanner : IScanner, ILocator
/// next state = statetable[value + 3]. That is, the value points
/// to the start of the answer 4-tuple in the statetable.
/// </summary>
private static short[][] statetableIndex;
private static readonly short[][] statetableIndex = LoadStateTableIndex(ref statetableIndexMaxChar); // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)

/// <summary>
/// The highest character value seen in the statetable.
/// See the doc comment for statetableIndex to see how this
/// is used.
/// </summary>
private static int statetableIndexMaxChar;
public HTMLScanner()

private static short[][] LoadStateTableIndex(ref int statetableIndexMaxChar)
{
short[][] result;
int maxState = -1;
int maxChar = -1;
for (int i = 0; i < statetable.Length; i += 4)
Expand All @@ -322,11 +323,11 @@ public HTMLScanner()
}
statetableIndexMaxChar = maxChar + 1;

statetableIndex = new short[maxState + 1][];
result = new short[maxState + 1][];

for (int i = 0; i <= maxState; i++)
{
statetableIndex[i] = new short[maxChar + 3];
result[i] = new short[maxChar + 3];
}
for (int theState = 0; theState <= maxState; ++theState)
{
Expand All @@ -353,9 +354,10 @@ public HTMLScanner()
break;
}
}
statetableIndex[theState][ch + 2] = (short)hit;
result[theState][ch + 2] = (short)hit;
}
}
return result;
}

// Locator implementation
Expand All @@ -372,14 +374,14 @@ public HTMLScanner()
// Scanner implementation

/// <summary>
/// Reset document locator, supplying systemid and publicid.
/// Reset document locator, supplying systemId and publicId.
/// </summary>
/// <param name="systemid">System id</param>
/// <param name="publicid">Public id</param>
public virtual void ResetDocumentLocator(string publicid, string systemid)
/// <param name="systemId">System id</param>
/// <param name="publicId">Public id</param>
public virtual void ResetDocumentLocator(string publicId, string systemId)
{
thePublicid = publicid;
theSystemid = systemid;
thePublicid = publicId;
theSystemid = systemId;
theLastLine = theLastColumn = theCurrentLine = theCurrentColumn = 0;
}

Expand Down Expand Up @@ -440,9 +442,9 @@ public virtual void Scan(TextReader r, IScanHandler h)
switch (action)
{
case 0:
throw new Exception(
"HTMLScanner can't cope with " + (int)ch + " in state " +
(int)theState);
throw Error.Create(
"HTMLScanner can't cope with " + ch + " in state " +
theState);
case A_ADUP:
h.Adup(theOutputBuffer, 0, theSize);
theSize = 0;
Expand Down Expand Up @@ -668,7 +670,7 @@ public virtual void Scan(TextReader r, IScanHandler h)
theSize = 0;
break;
default:
throw new Exception("Can't process state " + action);
throw Error.Create("Can't process state " + action);
}
if (!unread)
{
Expand Down
50 changes: 28 additions & 22 deletions src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,82 +38,88 @@ namespace TagSoup
/// </summary>
public class PYXScanner : IScanner
{
public virtual void ResetDocumentLocator(string publicid, string systemid)
public virtual void ResetDocumentLocator(string publicId, string systemId)
{
// Need this method for interface compatibility, but note
// that PyxScanner does not implement Locator.
}

public virtual void Scan(TextReader br, IScanHandler h)
{
// LUCENENET: Added guard clauses
if (br is null)
throw new ArgumentNullException(nameof(br));
if (h is null)
throw new ArgumentNullException(nameof(h));

string s;
char[] buff = null;
char[] buffer = null;
bool instag = false;
while ((s = br.ReadLine()) != null)
{
int size = s.Length;
buff = s.ToCharArray(0, size);
if (buff.Length < size)
buffer = s.ToCharArray(0, size);
if (buffer.Length < size)
{
buff = new char[size];
buffer = new char[size];
}
switch (buff[0])
switch (buffer[0])
{
case '(':
if (instag)
{
h.STagC(buff, 0, 0);
h.STagC(buffer, 0, 0);
//instag = false; // LUCENENET: IDE0059: Remove unnecessary value assignment
}
h.GI(buff, 1, size - 1);
h.GI(buffer, 1, size - 1);
instag = true;
break;
case ')':
if (instag)
{
h.STagC(buff, 0, 0);
h.STagC(buffer, 0, 0);
instag = false;
}
h.ETag(buff, 1, size - 1);
h.ETag(buffer, 1, size - 1);
break;
case '?':
if (instag)
{
h.STagC(buff, 0, 0);
h.STagC(buffer, 0, 0);
instag = false;
}
h.PI(buff, 1, size - 1);
h.PI(buffer, 1, size - 1);
break;
case 'A':
int sp = s.IndexOf(' ');
h.Aname(buff, 1, sp - 1);
h.Aval(buff, sp + 1, size - sp - 1);
h.Aname(buffer, 1, sp - 1);
h.Aval(buffer, sp + 1, size - sp - 1);
break;
case '-':
if (instag)
{
h.STagC(buff, 0, 0);
h.STagC(buffer, 0, 0);
instag = false;
}
if (s.Equals("-\\n", StringComparison.Ordinal))
{
buff[0] = '\n';
h.PCDATA(buff, 0, 1);
buffer[0] = '\n';
h.PCDATA(buffer, 0, 1);
}
else
{
// FIXME:
// Does not decode \t and \\ in input
h.PCDATA(buff, 1, size - 1);
h.PCDATA(buffer, 1, size - 1);
}
break;
case 'E':
if (instag)
{
h.STagC(buff, 0, 0);
h.STagC(buffer, 0, 0);
instag = false;
}
h.Entity(buff, 1, size - 1);
h.Entity(buffer, 1, size - 1);
break;
default:
// System.err.print("Gotcha ");
Expand All @@ -122,10 +128,10 @@ public virtual void Scan(TextReader br, IScanHandler h)
break;
}
}
h.EOF(buff, 0, 0);
h.EOF(buffer, 0, 0);
}

public void StartCDATA()
public virtual void StartCDATA()
{
}

Expand Down
Loading

0 comments on commit fef0181

Please sign in to comment.