Apache Commons LevenshteinDistanceLevenshteinDistance(final Integer threshold) 如果阈值不为空,则距离计算将限制为最大长度。
如果阈值不为空,则距离计算将受到限制 到最大长度。
如果阈值为 null,则算法的无限版本将 被使用。
LevenshteinDistance() 方法是一个构造函数。
来自LevenshteinDistance 的方法 LevenshteinDistance() 声明为:
复制
public LevenshteinDistance(final Integer threshold)
参数
LevenshteinDistance() 方法具有以下参数:
以下代码演示如何使用 Apache CommonsLevenshteinDistance LevenshteinDistance(final Integer threshold)
例 1
复制
import org.apache.commons.text.*;
import org.apache.commons.text.diff.*;
import org.apache.commons.text.similarity.*;
import org.apache.commons.text.translate.*;import java.util.HashMap;
import java.util.Locale;
import java.util.Map;class ShowVisitor<Character> implements CommandVisitor<Character> {private int inserts = 0;private int keeps = 0;private int deletes = 0;public void visitInsertCommand(Character character) {++inserts;// w w w . d e m o 2 s . c o m System.out.println(String.format("insert %s", character));}public void visitKeepCommand(Character character) {++keeps;System.out.println(String.format("keep %s", character));}public void visitDeleteCommand(Character character) {++deletes;System.out.println(String.format("delete %s", character));}public void printStats() {System.out.println(String.format("%d inserts, %d deletes, %d keeps", inserts, deletes, keeps));}
}public class CommonsTextExamples {public static void main(String[] args) {caseUtilsExample();stringEscapeUtilsExample();stringSubstitutorExample();wordUtilsExample();diffExample();translateExample();similaritiesExample();sentenceSimilarityExample();distancesExample();sentenceDistanceExample();}private static void printExampleHeader(String example) {// Contains an example of TextStringBuilderString header = "Examples of " + example;System.out.println("\n" + header);TextStringBuilder builder = new TextStringBuilder();System.out.println(builder.appendPadding(header.length(), '-').toString());}public static void caseUtilsExample() {printExampleHeader("CaseUtils");String string = "java-programming-language";System.out.println(CaseUtils.toCamelCase(string, true, '-'));System.out.println(CaseUtils.toCamelCase(string, false, '-'));}public static void stringEscapeUtilsExample() {printExampleHeader("StringEscapeUtils");String string = "Department, R&D";System.out.println(StringEscapeUtils.escapeHtml4(string));System.out.println(StringEscapeUtils.escapeXml11(string));System.out.println(StringEscapeUtils.escapeCsv(string));System.out.println(StringEscapeUtils.builder(StringEscapeUtils.ESCAPE_HTML4).append("R&D dept: ").escape(string).toString());}public static void stringSubstitutorExample() {printExampleHeader("StringSubstitutor");Map<String, String> substitutions = new HashMap<>();substitutions.put("city", "London");substitutions.put("country", "England");// With static methodSystem.out.println(StringSubstitutor.replace("${city} is the capital of ${country}", substitutions));// With StringSubstitutor objectStringSubstitutor sub = new StringSubstitutor(substitutions);System.out.println(sub.replace("${city} is the capital of ${country}"));StringSubstitutor interpolator = StringSubstitutor.createInterpolator();System.out.println(interpolator.replace("Base64 encoder: ${base64Encoder:Secret password}"));}public static void wordUtilsExample() {printExampleHeader("WordUtils");String longString = "This is a very long string, from https://www.example.org";String allLower = "all lower but ONE";String allCapitalized = "All Capitalized But ONE";System.out.println("\nWordUtils: Abbreviation");// Take at least 9 characters, cutting to 12 characters if no space is found beforeSystem.out.println(WordUtils.abbreviate(longString, 9, 12, " ..."));// Take at least 10 characters, cutting to 12 characters if no space is found beforeSystem.out.println(WordUtils.abbreviate(longString, 10, 12, " ..."));// Take at least 10 characters, then cut on the first space wherever it isSystem.out.println(WordUtils.abbreviate(longString, 10, -1, " ..."));System.out.println("\nWordUtils: Initials");System.out.println(WordUtils.initials(allLower));System.out.println(WordUtils.initials(allCapitalized));System.out.println("\nWordUtils: Case change");// Doesn't lowercase the uppercase charactersSystem.out.println(WordUtils.capitalize(allLower));// Lowercases everything, then capitalizes the first letter of each wordSystem.out.println(WordUtils.capitalizeFully(allLower));// Lowercases the first letter of each wordSystem.out.println(WordUtils.uncapitalize(allCapitalized));// Swaps the case of each characterSystem.out.println(WordUtils.swapCase(allLower));System.out.println("\nWordUtils: Wrapping");// Line length is 10, uses '\n' as a line break, does not break words longer than the lineSystem.out.println(WordUtils.wrap(longString, 10, "\n", false) + "\n");// Line length is 10, uses '\n' as a line break, breaks words longer than the lineSystem.out.println(WordUtils.wrap(longString, 10, "\n", true) + "\n");// Line length is 10, uses '\n' as a line break, breaks words longer than the line, also breaks on commasSystem.out.println(WordUtils.wrap(longString, 10, "\n", true, ",") + "\n");}public static void diffExample() {printExampleHeader("diff");String s1 = "hyperspace";String s2 = "cyberscape";StringsComparator comparator = new StringsComparator(s1, s2);EditScript<Character> script = comparator.getScript();System.out.println("Longest Common Subsequence length (number of \"keep\" commands): " + script.getLCSLength());System.out.println("Effective modifications (number of \"insert\" and \"delete\" commands): "+ script.getModifications());ShowVisitor<Character> visitor = new ShowVisitor<>();script.visit(visitor);visitor.printStats();}public static void translateExample() {printExampleHeader("translate");Map<CharSequence, CharSequence> translation = new HashMap<>();translation.put("e", "3");translation.put("l", "1");translation.put("t", "7");String s1 = "Let it be!";LookupTranslator lookupTranslator = new LookupTranslator(translation);System.out.println(lookupTranslator.translate(s1));UnicodeEscaper unicodeEscaper = new UnicodeEscaper();UnicodeUnescaper unicodeUnescaper = new UnicodeUnescaper();String unicodeString = unicodeEscaper.translate(s1);System.out.println(unicodeString);System.out.println(unicodeUnescaper.translate(unicodeString));}public static void similaritiesExample() {printExampleHeader("similarities");String s1 = "hyperspace";String s2 = "cyberscape";JaccardSimilarity jaccard = new JaccardSimilarity();System.out.println("Jaccard similarity: " + jaccard.apply(s1, s2));JaroWinklerSimilarity jaroWinkler = new JaroWinklerSimilarity();System.out.println("Jaro-Winkler similarity: " + jaroWinkler.apply(s1, s2));LongestCommonSubsequence lcs = new LongestCommonSubsequence();System.out.println("Longest Common Subsequence similarity: " + lcs.apply(s1, s2));FuzzyScore fuzzyScore = new FuzzyScore(Locale.ENGLISH);System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, s2));System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, "space"));}public static void sentenceSimilarityExample() {printExampleHeader("sentence similarity");String s1 = "string similarity";String s2 = "string distance";Map<CharSequence, Integer> vector1 = new HashMap<>();Map<CharSequence, Integer> vector2 = new HashMap<>();for (String token : s1.split(" ")) {vector1.put(token, vector1.getOrDefault(token, 0) + 1);}for (String token : s2.split(" ")) {vector2.put(token, vector2.getOrDefault(token, 0) + 1);}CosineSimilarity cosine = new CosineSimilarity();System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));// Adding one repetition of "string" to vector2vector2.put("string", vector2.getOrDefault("string", 0) + 1);System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));}public static void distancesExample() {printExampleHeader("distances");String s1 = "hyperspace";String s2 = "cyberscape";HammingDistance hamming = new HammingDistance();// Requires the two strings to have the same lengthSystem.out.println("Hamming distance: " + hamming.apply(s1, s2));JaccardDistance jaccard = new JaccardDistance();System.out.println("Jaccard distance: " + jaccard.apply(s1, s2));JaroWinklerDistance jaroWinkler = new JaroWinklerDistance();// The result is wrong at the moment (see https://issues.apache.org/jira/browse/TEXT-104)System.out.println("Jaro-Winkler distance: " + jaroWinkler.apply(s1, s2));LongestCommonSubsequenceDistance lcs = new LongestCommonSubsequenceDistance();System.out.println("Longest Common Subsequence distance: " + lcs.apply(s1, s2));LevenshteinDistance levenshtein = new LevenshteinDistance();System.out.println("Levenshtein distance: " + levenshtein.apply(s1, s2));LevenshteinDistance levenshteinWithThreshold = new LevenshteinDistance(3);// Returns -1 since the actual distance, 4, is higher than the thresholdSystem.out.println("Levenshtein distance: " + levenshteinWithThreshold.apply(s1, s2));LevenshteinDetailedDistance levenshteinDetailed = new LevenshteinDetailedDistance();System.out.println("Levenshtein detailed distance: " + levenshteinDetailed.apply(s1, s2));}public static void sentenceDistanceExample() {printExampleHeader("sentence distance");String s1 = "string similarity";String s2 = "string distance";CosineDistance cosine = new CosineDistance();System.out.println("Cosine distance: " + cosine.apply(s1, s2));System.out.println("Cosine distance: " + cosine.apply(s1, s2 + " string"));}
}
上一篇:他俩分手?男方私生活被扒,这瓜有点大 鍒嗘墜涓冨勾鐨勪袱浜烘亱鎯呰鐖嗗厜 鐖嗘枡鎻鏄旀棩鎭╃埍澶鍒嗘墜
下一篇:仅播2集,收视率破2.2,不愧是你们苦苦盼了1年的央视年代黑马剧 开播收视率就爆表的5部电视剧 今年央八收视率破二的剧