using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace FuzzyString { public static partial class ComparisonMetrics { public static double GetFuzzyEqualityScore(this string source, string target, params FuzzyStringComparisonOptions[] options) { List comparisonResults = new List(); if (!options.Contains(FuzzyStringComparisonOptions.CaseSensitive)) { source = source.Capitalize(); target = target.Capitalize(); } // Min: 0 Max: source.Length = target.Length if (options.Contains(FuzzyStringComparisonOptions.UseHammingDistance)) { if (source.Length == target.Length) { comparisonResults.Add(source.HammingDistance(target)/target.Length); } } // Min: 0 Max: 1 if (options.Contains(FuzzyStringComparisonOptions.UseJaccardDistance)) { comparisonResults.Add(source.JaccardDistance(target)); } // Min: 0 Max: 1 if (options.Contains(FuzzyStringComparisonOptions.UseJaroDistance)) { comparisonResults.Add(source.JaroDistance(target)); } // Min: 0 Max: 1 if (options.Contains(FuzzyStringComparisonOptions.UseJaroWinklerDistance)) { comparisonResults.Add(source.JaroWinklerDistance(target)); } // Min: 0 Max: LevenshteinDistanceUpperBounds - LevenshteinDistanceLowerBounds // Min: LevenshteinDistanceLowerBounds Max: LevenshteinDistanceUpperBounds if (options.Contains(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance)) { comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target))/ Convert.ToDouble((Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target)))); } else if (options.Contains(FuzzyStringComparisonOptions.UseLevenshteinDistance)) { comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target))/ Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target))); } if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubsequence)) { comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubsequence(target).Length)/ Convert.ToDouble(Math.Min(source.Length, target.Length)))); } if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubstring)) { comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubstring(target).Length)/ Convert.ToDouble(Math.Min(source.Length, target.Length)))); } // Min: 0 Max: 1 if (options.Contains(FuzzyStringComparisonOptions.UseSorensenDiceDistance)) { comparisonResults.Add(source.SorensenDiceDistance(target)); } // Min: 0 Max: 1 if (options.Contains(FuzzyStringComparisonOptions.UseOverlapCoefficient)) { comparisonResults.Add(1 - source.OverlapCoefficient(target)); } // Min: 0 Max: 1 if (options.Contains(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity)) { comparisonResults.Add(1 - source.RatcliffObershelpSimilarity(target)); } return comparisonResults.Average(); } public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options) { if (options.Length == 0) { return false; } var score = source.GetFuzzyEqualityScore(target, options); if (tolerance == FuzzyStringComparisonTolerance.Strong) { if (score < 0.25) { return true; } else { return false; } } else if (tolerance == FuzzyStringComparisonTolerance.Normal) { if (score < 0.5) { return true; } else { return false; } } else if (tolerance == FuzzyStringComparisonTolerance.Weak) { if (score < 0.75) { return true; } else { return false; } } else if (tolerance == FuzzyStringComparisonTolerance.Manual) { if (score > 0.6) { return true; } else { return false; } } else { return false; } } } }