Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Gen5/3DS/Switch word filters #4423

Merged
merged 6 commits into from
Jan 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 0 additions & 95 deletions PKHeX.Core/Legality/Restrictions/WordFilter.cs

This file was deleted.

70 changes: 70 additions & 0 deletions PKHeX.Core/Legality/Restrictions/WordFilter/TextNormalizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
using System;

namespace PKHeX.Core;

/// <summary>
/// Simplistic normalization of a string used by the Nintendo 3DS and Nintendo Switch games.
/// </summary>
public static class TextNormalizer
{
private const string Dakuten = "カキクケコサシスセソタチツテトハヒフヘホ"; // 'ウ' handled separately
private const string Handakuten = "ハヒフヘホ";
private const string FullwidthKana = "ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン";
private const string SmallKana = "ァィゥェォッャュョヮ"; // 'ヵ', 'ヶ' handled separately

/// <summary>
/// Normalize a string to a simplified form for checking against a bad-word list.
/// </summary>
/// <param name="input">Input string to normalize</param>
/// <param name="output">Output buffer to write the normalized string</param>
public static int Normalize(ReadOnlySpan<char> input, Span<char> output)
{
int ctr = 0;
for (int i = 0; i < input.Length; i++)
{
var c = input[i];

// Skip spaces and halfwidth dakuten/handakuten
if (c is ' ' or '\u3000' or '゙' or '゚')
continue;

// Handle combining halfwidth dakuten/handakuten
ushort ofs = 0;
if (c is >= 'ヲ' and <= 'ン' && i + 1 < input.Length)
{
var d = input[i + 1];
if (d == '゙' && Dakuten.Contains(c))
ofs = 1;
else if (d == '゚' && Handakuten.Contains(c))
ofs = 2;
else if (d == '゙' && c == 'ウ')
ofs = 'ヴ' - 'ウ'; // 0x4E (78)
}

// Fold characters treated identically
c = char.ToLowerInvariant(c); // fold to lowercase
c = (char)(c switch
{
>= 'ぁ' and <= 'ゖ' => c + 0x60, // shift hiragana to katakana
>= '0' and <= '9' or >= 'a' and <= 'z' => c - 0xFEE0, // shift fullwidth numbers/letters to halfwidth
>= 'ヲ' and <= 'ン' => FullwidthKana[c - 'ヲ'] + ofs, // shift halfwidth katakana to fullwidth
_ => c,
});

// Shift small kana to normal kana
if (c is >= 'ァ' and <= 'ヶ')
{
if (SmallKana.Contains(c))
c += (char)1;
else if (c == 'ヵ')
c = 'カ';
else if (c == 'ヶ')
c = 'ケ';
}

output[ctr] = c;
ctr++;
}
return ctr;
}
}
116 changes: 116 additions & 0 deletions PKHeX.Core/Legality/Restrictions/WordFilter/WordFilter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
using System;
using System.Diagnostics.CodeAnalysis;
using System.Text.RegularExpressions;

namespace PKHeX.Core;

/// <summary>
/// Bad-word Filter class containing logic to check against unsavory regular expressions.
/// </summary>
public static class WordFilter
{
// if you're running this as a server and don't mind a few extra seconds of startup, add RegexOptions.Compiled for slightly better checking.
private const RegexOptions Options = RegexOptions.CultureInvariant;

internal static Regex[] LoadPatterns(ReadOnlySpan<char> patterns)
{
// Make it lowercase invariant
Span<char> lowercase = stackalloc char[patterns.Length];
patterns.ToLowerInvariant(lowercase);

var lineCount = 1 + lowercase.Count('\n');
var result = new Regex[lineCount];
int i = 0;
foreach (var line in lowercase.EnumerateLines())
result[i++] = new Regex(line.ToString(), Options);
return result;
}

/// <summary>
/// Checks to see if a phrase contains filtered content.
/// </summary>
/// <param name="message">Phrase to check</param>
/// <param name="regexes">Console regex set to check against.</param>
/// <param name="regMatch">Matching regex that filters the phrase.</param>
/// <returns>Boolean result if the message is filtered or not.</returns>
internal static bool TryMatch(ReadOnlySpan<char> message, ReadOnlySpan<Regex> regexes, [NotNullWhen(true)] out string? regMatch)
{
// Clean the string
Span<char> clean = stackalloc char[message.Length];
int ctr = TextNormalizer.Normalize(message, clean);
if (ctr != clean.Length)
clean = clean[..ctr];

foreach (var regex in regexes)
{
foreach (var _ in regex.EnumerateMatches(clean))
{
regMatch = regex.ToString();
return true;
}
}
regMatch = null;
return false;
}

/// <inheritdoc cref="IsFiltered(ReadOnlySpan{char}, out string?, EntityContext, EntityContext)"/>
public static bool IsFiltered(ReadOnlySpan<char> message, [NotNullWhen(true)] out string? regMatch,
EntityContext current)
=> IsFiltered(message, out regMatch, current, current);

/// <summary>
/// Checks to see if a phrase contains filtered content.
/// </summary>
/// <param name="message">Phrase to check for</param>
/// <param name="regMatch">Matching regex that filters the phrase.</param>
/// <param name="current">Current context to check.</param>
/// <param name="original">Earliest context to check.</param>
/// <returns>Boolean result if the message is filtered or not.</returns>
public static bool IsFiltered(ReadOnlySpan<char> message, [NotNullWhen(true)] out string? regMatch,
EntityContext current, EntityContext original)
{
regMatch = null;
if (message.IsWhiteSpace() || message.Length <= 1)
return false;

// Only check against the single filter if requested
if (ParseSettings.Settings.WordFilter.DisableWordFilterPastGen)
return IsFilteredCurrentOnly(message, ref regMatch, current, original);

return IsFilteredLookBack(message, out regMatch, current, original);
}

private static bool IsFilteredCurrentOnly(ReadOnlySpan<char> message, ref string? regMatch,
EntityContext current, EntityContext original) => current switch
{
EntityContext.Gen5 => WordFilter5.IsFiltered(message, out regMatch),

EntityContext.Gen6 => WordFilter3DS.IsFilteredGen6(message, out regMatch),
EntityContext.Gen7 when original is EntityContext.Gen6
=> WordFilter3DS.IsFilteredGen6(message, out regMatch),

EntityContext.Gen7 => WordFilter3DS.IsFilteredGen7(message, out regMatch),
_ => current.GetConsole() switch
{
GameConsole.NX => WordFilterNX.IsFiltered(message, out regMatch, original),
_ => false,
},
};

private static bool IsFilteredLookBack(ReadOnlySpan<char> message, [NotNullWhen(true)] out string? regMatch,
EntityContext current, EntityContext original)
{
// Switch 2 backwards transfer? Won't know for another couple years.
if (WordFilterNX.IsFiltered(message, out regMatch, original))
return true;

var generation = original.Generation();
if (generation > 7 || original is EntityContext.Gen7b)
return false;
if (WordFilter3DS.IsFiltered(message, out regMatch, original))
return true;

return generation == 5 && WordFilter5.IsFiltered(message, out regMatch);
// no other word filters (none in Gen3 or Gen4)
}
}
91 changes: 91 additions & 0 deletions PKHeX.Core/Legality/Restrictions/WordFilter/WordFilter3DS.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
using System;
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
using System.Text.RegularExpressions;

namespace PKHeX.Core;

/// <summary>
/// Word filter for 3DS games.
/// </summary>
public static class WordFilter3DS
{
private static readonly Regex[] Regexes = WordFilter.LoadPatterns(Util.GetStringResource("badwords_3ds"));

/// <summary>
/// Regex patterns to check against
/// </summary>
/// <remarks>No need to keep the original pattern strings around; the <see cref="Regex"/> object retrieves this via <see cref="Regex.ToString()"/></remarks>
private static readonly ConcurrentDictionary<string, string?>.AlternateLookup<ReadOnlySpan<char>> Lookup =
new ConcurrentDictionary<string, string?>().GetAlternateLookup<ReadOnlySpan<char>>();

private const int MAX_COUNT = (1 << 17) - 1; // arbitrary cap for max dictionary size

/// <inheritdoc cref="IsFiltered"/>
/// <remarks>Generation 6 is case-sensitive.</remarks>
public static bool IsFilteredGen6(ReadOnlySpan<char> message, [NotNullWhen(true)] out string? regMatch)
=> IsFiltered(message, out regMatch, EntityContext.Gen6);

/// <inheritdoc cref="IsFiltered"/>
/// <remarks>Generation 7 is case-insensitive.</remarks>
public static bool IsFilteredGen7(ReadOnlySpan<char> message, [NotNullWhen(true)] out string? regMatch)
=> IsFiltered(message, out regMatch, EntityContext.Gen7);

/// <summary>
/// Checks to see if a phrase contains filtered content.
/// </summary>
/// <param name="message">Phrase to check</param>
/// <param name="regMatch">Matching regex that filters the phrase.</param>
/// <param name="original">Earliest context to check.</param>
/// <returns>Boolean result if the message is filtered or not.</returns>
public static bool IsFiltered(ReadOnlySpan<char> message, [NotNullWhen(true)] out string? regMatch, EntityContext original)
{
regMatch = null;
if (IsSpeciesName(message, original))
return false;

// Check dictionary
if (Lookup.TryGetValue(message, out regMatch))
return regMatch != null;

// not in dictionary, check patterns
if (WordFilter.TryMatch(message, Regexes, out regMatch))
{
Lookup.TryAdd(message, regMatch);
return true;
}

// didn't match any pattern, cache result
if ((Lookup.Dictionary.Count & ~MAX_COUNT) != 0)
Lookup.Dictionary.Clear(); // reset
Lookup.TryAdd(message, regMatch = null);
return false;
}

/// <summary>
/// Check if the message is a species name
/// </summary>
/// <param name="message">Phrase to check</param>
/// <param name="original">Earliest context to check.</param>
public static bool IsSpeciesName(ReadOnlySpan<char> message, EntityContext original)
{
// Gen6 is case-sensitive, Gen7 is case-insensitive.
if (original is EntityContext.Gen6) // Match case
return IsSpeciesNameGen6(message);
return IsSpeciesNameGen7(message);
}

private static bool IsSpeciesNameGen7(ReadOnlySpan<char> message)
{
if (!SpeciesName.TryGetSpeciesAnyLanguageCaseInsensitive(message, out var s7, 7))
return false;
return s7 <= Legal.MaxSpeciesID_7_USUM;
}

private static bool IsSpeciesNameGen6(ReadOnlySpan<char> message)
{
if (!SpeciesName.TryGetSpeciesAnyLanguage(message, out var s6, 6))
return false;
return s6 <= Legal.MaxSpeciesID_6;
}
}
Loading