You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
using System;
namespace Apewer.Internals {
/// <summary>Levenshtein 距离:两个字符串之间,由一个转换成另一个所需的最少编辑操作次数。</summary>
internal class Levenshtein {
#region 结果
private double _Rate; private int _Times; private int _Distance; private TimeSpan _Duration;
/// <summary>相似度,取值范围 0 ~ 1。若不同,则趋近于 0;若完全相同,则为 1。</summary>
public double Rate { get { return _Rate; } }
/// <summary>对比次数。</summary>
public int Times { get { return _Times; } }
/// <summary>距离。</summary>
public int Distance { get { return _Distance; } }
/// <summary>时长。</summary>
public TimeSpan Duration { get { return _Duration; } }
#endregion
#region 计算
/// <summary>字符串 1。</summary>
private char[] _ArrChar1;
/// <summary>字符串 2。</summary>
private char[] _ArrChar2;
/// <summary>开始时间。</summary>
private DateTime _BeginTime;
/// <summary>结束时间。</summary>
private DateTime _EndTime;
/// <summary>算法矩阵。</summary>
private int[,] _Matrix;
/// <summary>矩阵列数。</summary>
private int _Column;
/// <summary>矩阵行数。</summary>
private int _Row;
private Levenshtein() { InitVariable(); }
/// <summary>初始化变量。</summary>
private void InitVariable() { _Rate = 0; _Times = 0; _Distance = 0; _Duration = new TimeSpan(0); }
/// <summary>初始化算法基本信息。</summary>
private void InitVariable(string arg1, string arg2) { InitVariable(); _ArrChar1 = arg1.ToCharArray(); _ArrChar2 = arg2.ToCharArray(); _Row = _ArrChar1.Length + 1; _Column = _ArrChar2.Length + 1; _Matrix = new int[_Row, _Column]; }
/// <summary>初始化矩阵的第一行和第一列。</summary>
private void InitMatrix() { for (int i = 0; i < _Column; i++) _Matrix[0, i] = i; for (int i = 0; i < _Row; i++) _Matrix[i, 0] = i; }
/// <summary>取三个数中的最小值。</summary>
private int Minimum(int argFirst, int argSecond, int argThird) { int intMin = argFirst; if (argSecond < intMin) intMin = argSecond; if (argThird < intMin) intMin = argThird; return intMin; }
/// <summary>计算相似度。</summary>
private void Compute() { // 开始时间。
_BeginTime = DateTime.Now;
// 初始化矩阵的第一行和第一列。
this.InitMatrix();
int intCost = 0; for (int i = 1; i < _Row; i++) { for (int j = 1; j < _Column; j++) { if (_ArrChar1[i - 1] == _ArrChar2[j - 1]) { intCost = 0; } else { intCost = 1; } // 关键步骤,计算当前位置值为左边 + 1、上面 + 1、左上角 + intCost 中的最小值。
// 循环遍历到最后 _Matrix[_Row - 1, _Column - 1] 即为两个字符串的距离。
_Matrix[i, j] = this.Minimum(_Matrix[i - 1, j] + 1, _Matrix[i, j - 1] + 1, _Matrix[i - 1, j - 1] + intCost); _Times++; } }
// 结束时间。
_EndTime = DateTime.Now;
// 相似率,移动次数小于最长的字符串长度的 20% 算同一题。
int intLength = _Row > _Column ? _Row : _Column; _Rate = 1 - (double)_Matrix[_Row - 1, _Column - 1] / intLength; _Duration = _EndTime - _BeginTime; _Distance = _Matrix[_Row - 1, _Column - 1]; }
#endregion
#region 方法
/// <summary>计算两个字符串的相似度。</summary>
public static Levenshtein Compute(string arg1, string arg2) { var vString1 = string.IsNullOrEmpty(arg1) ? "" : arg1; var vString2 = string.IsNullOrEmpty(arg2) ? "" : arg2; var vInstance = new Levenshtein(); vInstance.InitVariable(vString1, vString2); vInstance.Compute(); return vInstance; }
#endregion
}
}
|