Skip to content

Commit

Permalink
Fixed encoding provider loading on .NET Framework (for .NET Standard …
Browse files Browse the repository at this point in the history
…2.0 target) (apache#1036)

* Lucene.Net.Analysis.Ja.Tools.ConnectionCostsWriter: Added using for Lucene.Net.Support

* Lucene.Net.Analysis.Kuromoji + Lucene.Net.Analysis.SmartCn: Added EncodingProviderInitializer classes to ensure we don't load the encoding provider on a .NET Framework runtime when targeting netstandard2.0 (fixes apache#1025)
  • Loading branch information
NightOwl888 authored Nov 19, 2024
1 parent 3989c5a commit 2d4d332
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 18 deletions.
7 changes: 2 additions & 5 deletions src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,8 @@ public JapaneseTokenizerFactory(IDictionary<string, string> args)

static JapaneseTokenizerFactory()
{
#if FEATURE_ENCODINGPROVIDERS
// Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
var encodingProvider = System.Text.CodePagesEncodingProvider.Instance;
System.Text.Encoding.RegisterProvider(encodingProvider);
#endif
// LUCENENET: Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
EncodingProviderInitializer.EnsureInitialized();
}

public virtual void Inform(IResourceLoader loader)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;

namespace Lucene.Net.Util
{
/// <summary>
/// Loads the <see cref="System.Text.EncodingProvider"/> for the current runtime for support of
/// EUC-JP encoding.
/// </summary>
internal static class EncodingProviderInitializer
{
private static int initialized;

private static bool IsNetFramework =>
#if NETSTANDARD2_0
RuntimeInformation.FrameworkDescription.StartsWith(".NET Framework", StringComparison.OrdinalIgnoreCase);
#elif NET40_OR_GREATER
true;
#else
false;
#endif

[Conditional("FEATURE_ENCODINGPROVIDERS")]
public static void EnsureInitialized()
{
// Only allow a single thread to call this
if (0 != Interlocked.CompareExchange(ref initialized, 1, 0)) return;

#if FEATURE_ENCODINGPROVIDERS
if (!IsNetFramework)
{
Initialize();
}
#endif
}

#if FEATURE_ENCODINGPROVIDERS
// NOTE: CodePagesEncodingProvider.Instance loads early, so we need this in a separate method to ensure
// that it isn't executed until after we know which runtime we are on.
[MethodImpl(MethodImplOptions.NoInlining)]
private static void Initialize()
{
// Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
}
#endif
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using Lucene.Net.Codecs;
using Lucene.Net.Diagnostics;
using Lucene.Net.Store;
using Lucene.Net.Support;
using System.Diagnostics;
using System.IO;

Expand Down Expand Up @@ -37,7 +38,7 @@ public ConnectionCostsWriter(int forwardSize, int backwardSize)
this.forwardSize = forwardSize;
this.backwardSize = backwardSize;
//this.costs = new short[backwardSize][forwardSize];
this.costs = Support.RectangularArrays.ReturnRectangularArray<short>(backwardSize, forwardSize);
this.costs = RectangularArrays.ReturnRectangularArray<short>(backwardSize, forwardSize);
}

public void Add(int forwardId, int backwardId, int cost)
Expand Down
10 changes: 4 additions & 6 deletions src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using Lucene.Net.Util;
using System;
using Console = Lucene.Net.Util.SystemConsole;

namespace Lucene.Net.Analysis.Ja.Util
Expand Down Expand Up @@ -35,11 +36,8 @@ public enum DictionaryFormat { IPADIC, UNIDIC };

static DictionaryBuilder()
{
#if FEATURE_ENCODINGPROVIDERS
// Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
var encodingProvider = System.Text.CodePagesEncodingProvider.Instance;
System.Text.Encoding.RegisterProvider(encodingProvider);
#endif
// LUCENENET: Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
EncodingProviderInitializer.EnsureInitialized();
}

public static void Build(DictionaryFormat format,
Expand Down
7 changes: 2 additions & 5 deletions src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,8 @@ static AnalyzerProfile()
// from ever being loaded).
private static void Init()
{
#if FEATURE_ENCODINGPROVIDERS
// Support for GB2312 encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
var encodingProvider = System.Text.CodePagesEncodingProvider.Instance;
System.Text.Encoding.RegisterProvider(encodingProvider);
#endif
// LUCENENET: Support for GB2312 encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
EncodingProviderInitializer.EnsureInitialized();

string dirName = "smartcn-data";
//string propName = "analysis.properties";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;

namespace Lucene.Net.Util
{
/// <summary>
/// Loads the <see cref="System.Text.EncodingProvider"/> for the current runtime for support of
/// GB2312 encoding.
/// </summary>
internal static class EncodingProviderInitializer
{
private static int initialized;

private static bool IsNetFramework =>
#if NETSTANDARD2_0
RuntimeInformation.FrameworkDescription.StartsWith(".NET Framework", StringComparison.OrdinalIgnoreCase);
#elif NET40_OR_GREATER
true;
#else
false;
#endif

[Conditional("FEATURE_ENCODINGPROVIDERS")]
public static void EnsureInitialized()
{
// Only allow a single thread to call this
if (0 != Interlocked.CompareExchange(ref initialized, 1, 0)) return;

#if FEATURE_ENCODINGPROVIDERS
if (!IsNetFramework)
{
Initialize();
}
#endif
}

#if FEATURE_ENCODINGPROVIDERS
// NOTE: CodePagesEncodingProvider.Instance loads early, so we need this in a separate method to ensure
// that it isn't executed until after we know which runtime we are on.
[MethodImpl(MethodImplOptions.NoInlining)]
private static void Initialize()
{
// Support for GB2312 encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
}
#endif
}
}
7 changes: 6 additions & 1 deletion src/Lucene.Net.Tests.Analysis.Common/Startup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ protected override void TestFrameworkSetUp()
// require it to be added as well when using Hunspell, but there is no reason to load
// the code pages by default in Lucene.Net.Analysis.Common. It should be added by consumers
// or Hunspell that require it.
//
// Note this is in the test project, which never uses netstandard2.0. If we were using
// netstandard2.0, we would need an extra check to deteremine if we are on .NET Framework,
// which doesn't support encoding providers. See EncodingProviderInitializer in the
// Lucene.Net.Analysis.Kuromoji project.
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
#endif
}
}
}

0 comments on commit 2d4d332

Please sign in to comment.