Detect incomplete patterns in strings

I have a string containing nested repeating patterns, for example:

String pattern1 = "1234";
String pattern2 = "5678";
String patternscombined = "1234|1234|5678|9"//added | for reading pleasure
String pattern = (pattern1 + pattern1 + pattern2 + "9")
                +(pattern1 + pattern1 + pattern2 + "9")
                +(pattern1 + pattern1 + pattern2 + "9")
String result = "1234|1234|5678|9|1234|1234|56";

As you can see in the above example, the result has been disabled. But knowing the repeating patterns, you can predict what will happen next.

Now to my question: How can I predict the following repetitions of this pattern to get the resulting string, for example:

String predictedresult = "1234|1234|5678|9|1234|1234|5678|9|1234|1234|5678|9";

The patterns will be less than 10 characters, the predicted result will be less than 1000 characters.

I get only the cutoff result string, and the pattern recognition program is already implemented and working. In the example above, I would have result, pattern1, pattern2and patternscombined.

EDIT:

I found a solution for me:

import java.util.Arrays;


public class LRS {
    // return the longest common prefix of s and t
    public static String lcp(String s, String t) {
        int n = Math.min(s.length(), t.length());
        for (int i = 0; i < n; i++) {
            if (s.charAt(i) != t.charAt(i))
                return s.substring(0, i);
        }
        return s.substring(0, n);
    }

    // return the longest repeated string in s
    public static String lrs(String s) {
        // form the N suffixes
        int N = s.length();
        String[] suffixes = new String[N];
        for (int i = 0; i < N; i++) {
            suffixes[i] = s.substring(i, N);
        }
        // sort them
        Arrays.sort(suffixes);
        // find longest repeated substring by comparing adjacent sorted suffixes
        String lrs = "";
        for (int i = 0; i < N - 1; i++) {
            String x = lcp(suffixes[i], suffixes[i + 1]);
            if (x.length() > lrs.length())
                lrs = x;
        }
        return lrs;
    }

    public static int startingRepeats(final String haystack, final String needle)
    {
        String s = haystack;
        final int len = needle.length();
        if(len == 0){
            return 0;
        }
        int count = 0;

        while (s.startsWith(needle)) {
            count++;
            s = s.substring(len);
        }

        return count;
    }

    public static String lrscutoff(String s){
        String lrs = s;
        int length = s.length();
        for (int i = length; i > 0; i--) {
            String x = lrs(s.substring(0, i));
            if (startingRepeats(s, x) < 10 &&
                    startingRepeats(s, x) > startingRepeats(s, lrs)){
                lrs = x;
            }
        }
        return lrs;
    }

    // read in text, replacing all consecutive whitespace with a single space
    // then compute longest repeated substring
    public static void main(String[] args) {
        long time = System.nanoTime();
        long timemilis = System.currentTimeMillis();
        String s = "12341234567891234123456789123412345";
        String repeat = s;
        while(repeat.length() > 0){
            System.out.println("-------------------------");
            String repeat2 = lrscutoff(repeat);
            System.out.println("'" + repeat + "'");

            int count = startingRepeats(repeat, repeat2);
            String rest = repeat.substring(count*repeat2.length());
            System.out.println("predicted: (rest ='" + rest + "')" );
            while(count > 0){
                System.out.print("'" + repeat2 + "' + ");
                count--;
            }
            if(repeat.equals(repeat2)){
                System.out.println("''");
                break;
            }
            if(rest!="" && repeat2.contains(rest)){
                System.out.println("'" + repeat2 + "'");
            }else{
                System.out.println("'" + rest + "'");
            }

            repeat = repeat2;

        }
        System.out.println("Time: (nano+millis):");
        System.out.println(System.nanoTime()-time);
        System.out.println(System.currentTimeMillis()-timemilis);
    }
}
+4
2

(|) , , HashMap.

1234 = 2
1344 = 1
4411 = 5

, Longest Repeated Substring. , , . , . google .

+1

, - n-gram language model, , , , . , , . ( ), , , ( "" ).

+1

Source: https://habr.com/ru/post/1526783/


All Articles