نحوه حذف عبارات ایموجی در .NET

در اینجا نمونه ای از آنچه من امتحان کردم اما اشتباه پیش آمد
من برای اطلاعات در مورد نوشتن عبارات منظم در اینترنت جستجو کردم و JS را پیدا کردم:
const emojiRegex = /[\u{1F300}-\u{1F5FF}\u{1F900}-\u{1F9FF}\u{1F600}-\u{1F64F}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}\u{1F191}-\u{1F251}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F190}-\u{1F19A}]/gu;
استفاده از هوش مصنوعی برای ترجمه C#:
string emojiPattern = @"[\u1F300-\u1F5FF\u1F900-\u1F9FF\u1F600-\u1F64F\u1F680-\u1F6FF\u2600-\u26FF\u2700-\u27BF\u1F1E6-\u1F1FF\u1F191-\u1F251\u1F004\u1F0CF\u1F170-\u1F171\u1F17E-\u1F17F\u1F18E\u1F190-\u1F19A]";
سعی کنید از موارد زیر استفاده کنید:
🚀 𝙶ςς𝚍𝚖ς𝚗𝚒𝚗𝚐 The best way to ruin a song is to set it as an alarm⓪ The sun and I compete to see who wakes up earlier 𝓰𝓸𝓸𝒹 𝓂𝓸𝓇𝓃𝒾𝓃𝓰 ... 🌟
سپس از Dotnet Run نتیجه استفاده کنید:
🚀 𝙶ςς𝚍𝚖ς𝚗𝚒𝚗𝚐 The best way to ruin a song is to set it as an alarm⓪ The sun and I compete to see who wakes up earlier 𝓰𝓸𝓸𝒹 𝓂𝓸𝓇𝓃𝒾𝓃𝓰 ... 🌟
به نظر می رسد که هیچ تغییری وجود ندارد ، بنابراین من یک سوال دیگر در پشته پیدا کردم و به من گفتم که استفاده کنم:
string emojiPattern = @"\p{Cs}";
سپس نتیجه در حال اجرا نشان داد که سایر رمزگذاری های یونیکد نیز از بین رفته است:
The best way to ruin a song is to set it as an alarm ⓪ I compete with the sun to see who gets up earlier ...
روش صحیح برای انجام آن:
using System;
using System.Text;
public static class EmojiRemover
{
public static string RemoveEmojis(string input)
{
if (string.IsNullOrEmpty(input))
return input;
var stringBuilder = new StringBuilder();
for (int i = 0; i < input.Length;)
{
// Checks if the current character is the start of a surrogate pair
if (Char.IsSurrogate(input[i]))
{
// Get two characters of the proxy pair
if (i + 1 < input.Length && Char.IsSurrogatePair(input[i], input[i + 1]))
{
int codePoint = Char.ConvertToUtf32(input, i);
if (!IsUnicodeEmoji(codePoint))
{
stringBuilder.Append(input[i]);
stringBuilder.Append(input[i + 1]);
}
i += 2;
}
else
{
// Invalid proxy pair, skip it directly
i++;
}
}
else
{
int codePoint = Char.ConvertToUtf32(input, i);
if (!IsUnicodeEmoji(codePoint))
{
stringBuilder.Append(input[i]);
}
i++;
}
}
return stringBuilder.ToString();
}
private static bool IsUnicodeEmoji(int codePoint)
{
// Define common Emoji Unicode scopes
return (0x1F600 <= codePoint && codePoint <= 0x1F64F) ||
(0x1F300 <= codePoint && codePoint <= 0x1F5FF) ||
(0x1F680 <= codePoint && codePoint <= 0x1F6FF) ||
(0x1F1E0 <= codePoint && codePoint <= 0x1F1FF) ||
(0x1F900 <= codePoint && codePoint <= 0x1F9FF) ||
(0x1F004 <= codePoint && codePoint <= 0x1F0FF) ||
(0x2694 <= codePoint && codePoint <= 0x269F) ||
(0x2600 <= codePoint && codePoint <= 0x26FF) ||
(0x2700 <= codePoint && codePoint <= 0x27BF) ||
(0x1F700 <= codePoint && codePoint <= 0x1F77F) ||
(0x1F780 <= codePoint && codePoint <= 0x1F7FF) ||
(0x1F800 <= codePoint && codePoint <= 0x1F8FF) ||
(0x1F980 <= codePoint && codePoint <= 0x1F9E0);
}
}
نتیجه در حال اجرا کامل است:
𝙶ςς𝚍𝚖ς𝚗𝚒𝚗𝚐 The best way to ruin a song is to set it as an alarm ⓪ The sun and I are competing to see who wakes up earlier 𝓰𝓸𝓸𝒹 𝓂𝓸𝓇𝓃𝒾𝓃𝓰 ...