Java: how to compare two lines to get the parts where they differ?

I would like to know a way to get parts where the two lines are different.

Suppose I have these two lines:

String s1 = "x4.printString(\"Bianca.()\").y1();";
String s2 = "sb.printString(\"Bianca.()\").length();";

I need this conclusion: ["x4", "y1", "sb", "length"]coming from a method receiving s1and s2as arguments.

I searched for something similar in other posts, but I only found references to StringUtils.difference (String first, String second) .

But this method returns the second row from the index, where it begins to differ from the first.
I really don't know where to start, and any advice would be greatly appreciated.

UPDATE Following the recommendations of @aUserHimself, I managed to get all the common subsequences between the two strings, but these subsequences come out as a unique String.

This is my code now:

private static int[][] lcs(String s, String t) {
    int m, n;
    m = s.length();
    n = t.length();
    int[][] table = new int[m+1][n+1];
    for (int i=0; i < m+1; i++)
        for (int j=0; j<n+1; j++)
            table[i][j] = 0;
    for (int i = 1; i < m+1; i++)
        for (int j = 1; j < n+1; j++)
            if (s.charAt(i-1) == t.charAt(j-1))
                table[i][j] = table[i-1][j-1] + 1;
            else
                table[i][j] = Math.max(table[i][j-1], table[i-1][j]);
    return table;
}

private static List<String> backTrackAll(int[][]table, String s, String t, int m, int n){
    List<String> result = new ArrayList<>();
    if (m == 0 || n == 0) {
        result.add("");
        return result;
    }
    else
        if (s.charAt(m-1) == t.charAt(n-1)) {
            for (String sub : backTrackAll(table, s, t, m - 1, n - 1))
                result.add(sub + s.charAt(m - 1));
            return result;
        }
        else {
            if (table[m][n - 1] >= table[m - 1][n])
                result.addAll(backTrackAll(table, s, t, m, n - 1));
            else
                result.addAll(backTrackAll(table, s, t, m - 1, n));
            return result;
        }
}

private List<String> getAllSubsequences(String s, String t){
    return backTrackAll(lcs(s, t), s, t, s.length(), t.length());
}



The call getAllSubsequencesfor these two lines:

String s1 = "while (x1 < 5)"
String s2 = "while (j < 5)"

I get this line: ["while ( < 5)"]not ["while (", " < 5)"], which I would like to receive. I do not understand where I am doing wrong.

+4
source share
4 answers

Find the longest common subsequence between the two lines. After that, you can use indexOf to get the index of this common row between both rows and extract unusual values ​​from both.

example:

CICROSOFK
WOCROSFGT

Common letter

CROS

0 SOFT index+'SOFT'.length str.length

+1

, Longest Common Subsequence 2 .

, , placeholder, LCS , , . , , placeholder .

UPDATE 1: , ( ), . ( ) memoization, .

UPDATE 2: ( ), :

public class LongestCommonSequence {

    private final char[] firstStr;
    private final char[] secondStr;
    private int[][] LCS;
    private String[][] solution;
    private int max = -1, maxI = -1, maxJ = -1;
    private static final Character SEPARATOR = '|';

    public LongestCommonSequence(char[] firstStr, char[] secondStr) {
        this.firstStr = firstStr;
        this.secondStr = secondStr;
        LCS = new int[firstStr.length + 1][secondStr.length + 1];
        solution = new String[firstStr.length + 1][secondStr.length + 1];
    }

    public String find() {

        for (int i = 0; i <= secondStr.length; i++) {
            LCS[0][i] = 0;
            if(i > 0) {
                solution[0][i] = "   " + secondStr[i - 1];
            }
        }

        for (int i = 0; i <= firstStr.length; i++) {
            LCS[i][0] = 0;
            if(i > 0) {
                solution[i][0] = "   " + firstStr[i - 1];
            }
        }

        solution[0][0] = "NONE";

        for (int i = 1; i <= firstStr.length; i++) {
            for (int j = 1; j <= secondStr.length; j++) {
                if (firstStr[i - 1] == secondStr[j - 1] && firstStr[i - 1] != SEPARATOR) {
                    LCS[i][j] = LCS[i - 1][j - 1] + 1;
                    solution[i][j] = "diag";
                } else {
                    LCS[i][j] = 0;
                    solution[i][j] = "none";
                }
                if(LCS[i][j] > max) {
                    max = LCS[i][j];
                    maxI = i;
                    maxJ = j;
                }
            }
        }

        System.out.println("Path values:");
        for (int i = 0; i <= firstStr.length; i++) {
            for (int j = 0; j <= secondStr.length; j++) {
                System.out.print(" " + LCS[i][j]);
            }
            System.out.println();
        }

        System.out.println();
        System.out.println("Path recovery:");
        for (int i = 0; i <= firstStr.length; i++) {
            for (int j = 0; j <= secondStr.length; j++) {
                System.out.print(" " + solution[i][j]);
            }
            System.out.println();
        }
        System.out.println();
        System.out.println("max:" + max + " maxI:" + maxI + " maxJ:" + maxJ);

        return printSolution(maxI, maxJ);
    }

    public String printSolution(int i, int j) {
        String answer = "";
        while(i - 1 >= 0 && j - 1 >= 0 && LCS[i][j] != 0) {
            answer = firstStr[i - 1] + answer;
            i--;
            j--;
        }
        System.out.println("Current max solution: " + answer);
        return answer;
    }

    public static void main(String[] args) {
        String firstStr = "x4.printString(\\\"Bianca.()\\\").y1();";
        String secondStr = "sb.printString(\\\"Bianca.()\\\").length();";
        String maxSubstr;
        LongestCommonSequence lcs;
        do {
            lcs = new LongestCommonSequence(firstStr.toCharArray(), secondStr.toCharArray());
            maxSubstr = lcs.find();
            if(maxSubstr.length() != 0) {
                firstStr = firstStr.replace(maxSubstr, "" + LongestCommonSequence.SEPARATOR);
                secondStr = secondStr.replace(maxSubstr, "" + LongestCommonSequence.SEPARATOR);
            }
        }
        while(maxSubstr.length() != 0);

        System.out.println();
        System.out.println("first:" + firstStr + " second: " + secondStr);

        System.out.println("First array: ");
        String[] firstArray = firstStr.split("\\" + SEPARATOR);
        String[] secondArray = secondStr.split("\\" + SEPARATOR);
        for(String s: firstArray) {
            System.out.println(s);
        }
        System.out.println();
        System.out.println("Second array: ");
        for(String s: secondArray) {
            System.out.println(s);
        }
    }
}
+1

, :

public static void main(String[] args) throws InterruptedException, FileNotFoundException, ExecutionException {

    String s1 = "x4.printString(\"Bianca.()\").y1();";
    String s2 = "sb.printString(\"Bianca.()\").length();";

    List<String> result = new ArrayList<>();
    result.addAll(getDifferences(s1, s2));
    result.addAll(getDifferences(s2, s1));

    System.out.println(result);
}

public static List<String> getDifferences(String s1, String s2){
    if(s1 == null){
        return Collections.singletonList(s2);
    }
    if(s2 == null){
        return Collections.singletonList(s1);
    }
    int minimalLength = Math.min(s1.length(),s2.length());
    List<String> result = new ArrayList<>();
    StringBuilder buffer = new StringBuilder(); // keep the consecutive differences
    for(int i = 0; i<minimalLength; i++ ){
        char c = s1.charAt(i);
        if(c == s2.charAt(i)){
            if( buffer.length() > 0){
                result.add(buffer.toString());
                buffer = new StringBuilder();
            }
        } else {
            buffer.append(c);
        }
    }
    if(s1.length() > minimalLength){
        buffer.append(s1.substring(minimalLength)); // add the rest
    }
    if(buffer.length() > 0){
        result.add(buffer.toString()); //flush buffer
    }
    return result;
}

, , , ( ).

0

This is the solution I found thanks to this link posted by @aUserHimself.

private static int[][] lcs(String s, String t) {
        int m, n;
        m = s.length();
        n = t.length();
        int[][] table = new int[m+1][n+1];
        for (int i=0; i < m+1; i++)
            for (int j=0; j<n+1; j++)
                table[i][j] = 0;
        for (int i = 1; i < m+1; i++)
            for (int j = 1; j < n+1; j++)
                if (s.charAt(i-1) == t.charAt(j-1))
                        table[i][j] = table[i-1][j-1] + 1;
                else
                    table[i][j] = Math.max(table[i][j-1], table[i-1][j]);
        return table;
    }

private static List<List<String>> getDiffs(int[][] table, String s, String t, int i, int j,
                                           int indexS, int indexT, List<List<String>> diffs){
    List<String> sList, tList;
    sList = diffs.get(0);
    tList = diffs.get(1);
    if (i > 0 && j > 0 && (s.charAt(i-1) == t.charAt(j-1)))
        return getDiffs(table, s, t, i-1, j-1, indexS, indexT, diffs);
    else if (i > 0 || j > 0) {
            if (i > 0 && (j == 0 || table[i][j-1] < table[i-1][j])){
                if (i == indexS)
                    sList.set(sList.size()-1, String.valueOf(s.charAt(i-1)) + sList.get(sList.size() - 1));
                else
                    sList.add(String.valueOf(s.charAt(i-1)));
                diffs.set(0, sList);
                return getDiffs(table, s, t, i-1, j, i-1, indexT, diffs);
            }
            else if (j > 0 && (i == 0 || table[i][j-1] >= table[i-1][j])){
                if (j == indexT)
                    tList.set(tList.size() - 1, String.valueOf(t.charAt(j-1)) + tList.get(tList.size()-1));
                else
                    tList.add(String.valueOf(t.charAt(j-1)));
                diffs.set(1, tList);
                return getDiffs(table, s, t, i, j-1, indexS, j-1, diffs);
            }
        }
    return diffs;
}

private static List<List<String>> getAllDiffs(String s, String t){
    List<List<String>> diffs = new ArrayList<List<String>>();
    List<String> l1, l2;
    l1 = new ArrayList<>();
    l2 = new ArrayList<>();
    diffs.add(l1);
    diffs.add(l2);
    return getDiffs(lcs(s, t), s, t, s.length(), t.length(), 0,  0, diffs);
}

I published, because maybe this might be interesting for someone.

0
source

Source: https://habr.com/ru/post/1673495/


All Articles