You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

177 lines
5.4 KiB

  1. using System.Collections.Generic;
  2. using System.Text.RegularExpressions;
  3. namespace Humanizer.Inflections
  4. {
  5. /// <summary>
  6. /// A container for exceptions to simple pluralization/singularization rules.
  7. /// Vocabularies.Default contains an extensive list of rules for US English.
  8. /// At this time, multiple vocabularies and removing existing rules are not supported.
  9. /// </summary>
  10. internal class Vocabulary
  11. {
  12. internal Vocabulary()
  13. {
  14. }
  15. private readonly List<Rule> _plurals = new List<Rule>();
  16. private readonly List<Rule> _singulars = new List<Rule>();
  17. private readonly List<string> _uncountables = new List<string>();
  18. /// <summary>
  19. /// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people".
  20. /// </summary>
  21. /// <param name="singular">The singular form of the irregular word, e.g. "person".</param>
  22. /// <param name="plural">The plural form of the irregular word, e.g. "people".</param>
  23. /// <param name="matchEnding">True to match these words on their own as well as at the end of longer words. False, otherwise.</param>
  24. public void AddIrregular(string singular, string plural, bool matchEnding = true)
  25. {
  26. if (matchEnding)
  27. {
  28. AddPlural("(" + singular[0] + ")" + singular.Substring(1) + "$", "$1" + plural.Substring(1));
  29. AddSingular("(" + plural[0] + ")" + plural.Substring(1) + "$", "$1" + singular.Substring(1));
  30. }
  31. else
  32. {
  33. AddPlural($"^{singular}$", plural);
  34. AddSingular($"^{plural}$", singular);
  35. }
  36. }
  37. /// <summary>
  38. /// Adds an uncountable word to the vocabulary, e.g. "fish". Will be ignored when plurality is changed.
  39. /// </summary>
  40. /// <param name="word">Word to be added to the list of uncountables.</param>
  41. public void AddUncountable(string word)
  42. {
  43. _uncountables.Add(word.ToLower());
  44. }
  45. /// <summary>
  46. /// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses"
  47. /// </summary>
  48. /// <param name="rule">RegEx to be matched, case insensitive, e.g. "(bus)es$"</param>
  49. /// <param name="replacement">RegEx replacement e.g. "$1"</param>
  50. public void AddPlural(string rule, string replacement)
  51. {
  52. _plurals.Add(new Rule(rule, replacement));
  53. }
  54. /// <summary>
  55. /// Adds a rule to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index"
  56. /// </summary>
  57. /// <param name="rule">RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$""</param>
  58. /// <param name="replacement">RegEx replacement e.g. "$1ex"</param>
  59. public void AddSingular(string rule, string replacement)
  60. {
  61. _singulars.Add(new Rule(rule, replacement));
  62. }
  63. /// <summary>
  64. /// Pluralizes the provided input considering irregular words
  65. /// </summary>
  66. /// <param name="word">Word to be pluralized</param>
  67. /// <param name="inputIsKnownToBeSingular">Normally you call Pluralize on singular words; but if you're unsure call it with false</param>
  68. /// <returns></returns>
  69. public string Pluralize(string word, bool inputIsKnownToBeSingular = true)
  70. {
  71. var result = ApplyRules(_plurals, word, false);
  72. if (inputIsKnownToBeSingular)
  73. {
  74. return result ?? word;
  75. }
  76. var asSingular = ApplyRules(_singulars, word, false);
  77. var asSingularAsPlural = ApplyRules(_plurals, asSingular, false);
  78. if (asSingular != null && asSingular != word && asSingular + "s" != word && asSingularAsPlural == word && result != word)
  79. {
  80. return word;
  81. }
  82. return result;
  83. }
  84. /// <summary>
  85. /// Singularizes the provided input considering irregular words
  86. /// </summary>
  87. /// <param name="word">Word to be singularized</param>
  88. /// <param name="inputIsKnownToBePlural">Normally you call Singularize on plural words; but if you're unsure call it with false</param>
  89. /// <param name="skipSimpleWords">Skip singularizing single words that have an 's' on the end</param>
  90. /// <returns></returns>
  91. public string Singularize(string word, bool inputIsKnownToBePlural = true, bool skipSimpleWords = false)
  92. {
  93. var result = ApplyRules(_singulars, word, skipSimpleWords);
  94. if (inputIsKnownToBePlural)
  95. {
  96. return result ?? word;
  97. }
  98. // the Plurality is unknown so we should check all possibilities
  99. var asPlural = ApplyRules(_plurals, word, false);
  100. var asPluralAsSingular = ApplyRules(_singulars, asPlural, false);
  101. if (asPlural != word && word + "s" != asPlural && asPluralAsSingular == word && result != word)
  102. {
  103. return word;
  104. }
  105. return result ?? word;
  106. }
  107. private string ApplyRules(IList<Rule> rules, string word, bool skipFirstRule)
  108. {
  109. if (word == null)
  110. {
  111. return null;
  112. }
  113. if (word.Length < 1)
  114. {
  115. return word;
  116. }
  117. if (IsUncountable(word))
  118. {
  119. return word;
  120. }
  121. var result = word;
  122. var end = skipFirstRule ? 1 : 0;
  123. for (var i = rules.Count - 1; i >= end; i--)
  124. {
  125. if ((result = rules[i].Apply(word)) != null)
  126. {
  127. break;
  128. }
  129. }
  130. return result;
  131. }
  132. private bool IsUncountable(string word)
  133. {
  134. return _uncountables.Contains(word.ToLower());
  135. }
  136. private class Rule
  137. {
  138. private readonly Regex _regex;
  139. private readonly string _replacement;
  140. public Rule(string pattern, string replacement)
  141. {
  142. _regex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);
  143. _replacement = replacement;
  144. }
  145. public string Apply(string word)
  146. {
  147. if (!_regex.IsMatch(word))
  148. {
  149. return null;
  150. }
  151. return _regex.Replace(word, _replacement);
  152. }
  153. }
  154. }
  155. }