You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
178 lines
5.4 KiB
178 lines
5.4 KiB
using System.Collections.Generic;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace Humanizer.Inflections
|
|
{
|
|
/// <summary>
|
|
/// A container for exceptions to simple pluralization/singularization rules.
|
|
/// Vocabularies.Default contains an extensive list of rules for US English.
|
|
/// At this time, multiple vocabularies and removing existing rules are not supported.
|
|
/// </summary>
|
|
internal class Vocabulary
|
|
{
|
|
internal Vocabulary()
|
|
{
|
|
}
|
|
|
|
private readonly List<Rule> _plurals = new List<Rule>();
|
|
private readonly List<Rule> _singulars = new List<Rule>();
|
|
private readonly List<string> _uncountables = new List<string>();
|
|
|
|
/// <summary>
|
|
/// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people".
|
|
/// </summary>
|
|
/// <param name="singular">The singular form of the irregular word, e.g. "person".</param>
|
|
/// <param name="plural">The plural form of the irregular word, e.g. "people".</param>
|
|
/// <param name="matchEnding">True to match these words on their own as well as at the end of longer words. False, otherwise.</param>
|
|
public void AddIrregular(string singular, string plural, bool matchEnding = true)
|
|
{
|
|
if (matchEnding)
|
|
{
|
|
AddPlural("(" + singular[0] + ")" + singular.Substring(1) + "$", "$1" + plural.Substring(1));
|
|
AddSingular("(" + plural[0] + ")" + plural.Substring(1) + "$", "$1" + singular.Substring(1));
|
|
}
|
|
else
|
|
{
|
|
AddPlural($"^{singular}$", plural);
|
|
AddSingular($"^{plural}$", singular);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Adds an uncountable word to the vocabulary, e.g. "fish". Will be ignored when plurality is changed.
|
|
/// </summary>
|
|
/// <param name="word">Word to be added to the list of uncountables.</param>
|
|
public void AddUncountable(string word)
|
|
{
|
|
_uncountables.Add(word.ToLower());
|
|
}
|
|
|
|
/// <summary>
|
|
/// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses"
|
|
/// </summary>
|
|
/// <param name="rule">RegEx to be matched, case insensitive, e.g. "(bus)es$"</param>
|
|
/// <param name="replacement">RegEx replacement e.g. "$1"</param>
|
|
public void AddPlural(string rule, string replacement)
|
|
{
|
|
_plurals.Add(new Rule(rule, replacement));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Adds a rule to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index"
|
|
/// </summary>
|
|
/// <param name="rule">RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$""</param>
|
|
/// <param name="replacement">RegEx replacement e.g. "$1ex"</param>
|
|
public void AddSingular(string rule, string replacement)
|
|
{
|
|
_singulars.Add(new Rule(rule, replacement));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Pluralizes the provided input considering irregular words
|
|
/// </summary>
|
|
/// <param name="word">Word to be pluralized</param>
|
|
/// <param name="inputIsKnownToBeSingular">Normally you call Pluralize on singular words; but if you're unsure call it with false</param>
|
|
/// <returns></returns>
|
|
public string Pluralize(string word, bool inputIsKnownToBeSingular = true)
|
|
{
|
|
var result = ApplyRules(_plurals, word, false);
|
|
|
|
if (inputIsKnownToBeSingular)
|
|
{
|
|
return result ?? word;
|
|
}
|
|
|
|
var asSingular = ApplyRules(_singulars, word, false);
|
|
var asSingularAsPlural = ApplyRules(_plurals, asSingular, false);
|
|
if (asSingular != null && asSingular != word && asSingular + "s" != word && asSingularAsPlural == word && result != word)
|
|
{
|
|
return word;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Singularizes the provided input considering irregular words
|
|
/// </summary>
|
|
/// <param name="word">Word to be singularized</param>
|
|
/// <param name="inputIsKnownToBePlural">Normally you call Singularize on plural words; but if you're unsure call it with false</param>
|
|
/// <param name="skipSimpleWords">Skip singularizing single words that have an 's' on the end</param>
|
|
/// <returns></returns>
|
|
public string Singularize(string word, bool inputIsKnownToBePlural = true, bool skipSimpleWords = false)
|
|
{
|
|
var result = ApplyRules(_singulars, word, skipSimpleWords);
|
|
|
|
if (inputIsKnownToBePlural)
|
|
{
|
|
return result ?? word;
|
|
}
|
|
|
|
// the Plurality is unknown so we should check all possibilities
|
|
var asPlural = ApplyRules(_plurals, word, false);
|
|
var asPluralAsSingular = ApplyRules(_singulars, asPlural, false);
|
|
if (asPlural != word && word + "s" != asPlural && asPluralAsSingular == word && result != word)
|
|
{
|
|
return word;
|
|
}
|
|
|
|
return result ?? word;
|
|
}
|
|
|
|
private string ApplyRules(IList<Rule> rules, string word, bool skipFirstRule)
|
|
{
|
|
if (word == null)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
if (word.Length < 1)
|
|
{
|
|
return word;
|
|
}
|
|
|
|
if (IsUncountable(word))
|
|
{
|
|
return word;
|
|
}
|
|
|
|
var result = word;
|
|
var end = skipFirstRule ? 1 : 0;
|
|
for (var i = rules.Count - 1; i >= end; i--)
|
|
{
|
|
if ((result = rules[i].Apply(word)) != null)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private bool IsUncountable(string word)
|
|
{
|
|
return _uncountables.Contains(word.ToLower());
|
|
}
|
|
|
|
private class Rule
|
|
{
|
|
private readonly Regex _regex;
|
|
private readonly string _replacement;
|
|
|
|
public Rule(string pattern, string replacement)
|
|
{
|
|
_regex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
_replacement = replacement;
|
|
}
|
|
|
|
public string Apply(string word)
|
|
{
|
|
if (!_regex.IsMatch(word))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
return _regex.Replace(word, _replacement);
|
|
}
|
|
}
|
|
}
|
|
}
|