In .net System.Text
provides very powerful classes to play with strings. Many time we need to remove Unicode punctuations from string. There are many techniques to do this, one of them is as given below.
/// <summary> /// Removes any unicode punctuation /// </summary> /// <param name="text"> /// The text to remove punctuation from. /// </param> /// <returns> /// The string with the punctuation removed. /// </returns> private static string RemoveUnicodePunctuation(string text) { var normalized = text.Normalize(NormalizationForm.FormD); var sb = new StringBuilder(); foreach (var c in normalized.Where(c => CharUnicodeInfo.GetUnicodeCategory(c) != UnicodeCategory.InitialQuotePunctuation && CharUnicodeInfo.GetUnicodeCategory(c) != UnicodeCategory.FinalQuotePunctuation)) { sb.Append(c); } return sb.ToString(); }