var league = new Arr...">

C # and Regex: how to extract strings between quotes

Suppose I have the following line:

<script language = "javascript">
        var league = new Array (
            "Soccer", "Germany - 2. Bundesliga", "38542195", "102", "24 May 2009 14:00", "24 May 2009 14:00", "1X2", "1", "0"
        );
        var matches = new Array (
            "125", "1.FC Nurnberg - TSV 1860 Munich", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 1.40 | 4.10 | 6.40 | -", "|| || "," 1 | 1.90 | 3.50 | 2.20 | 0: 1 "," 1 | 1.05 | 2.20 | 1.18 | - "," 1 | 2.00 || 1.60 | 2.5 "," 1 | 3.40 | 3.20 | 1.60 | 2 "," 1 | 1.70 | 2.50 | 5.50 | - "," |||| - "," 1 ",
            "126", "FC Ingolstadt 04 - TuS Koblenz", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 3.60 | 2.80 | 2.00 | -", "|||| "," ||||: "," 1 | 1.68 | 1.25 | 1.26 | - "," 1 | 1.90 || 1.70 | 2.5 "," 1 | 3.10 | 3.10 | 1.70 | 2 "," 1 | 3.60 | 2.10 | 2.45 | - "," |||| - "," 1 ",
            "127", "FC St. Pauli 1910 - FSV Frankfurt", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 2.50 | 2.95 | 2.60 | -", "|| || "," ||||: "," 1 | 1.41 | 1.44 | 1.28 | - "," 1 | 2.00 || 1.60 | 2.5 "," 1 | 3.40 | 3.20 | 1.60 | 2 "," 1 | 2.95 | 2.00 | 3.05 | - "," |||| - "," 1 ",
            "128", "MSV Duisburg - VfL Osnabruck", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 2.30 | 3.60 | 2.40 | -", "||||" , "||||:", "1 | 1.35 | 1.51 | 1.27 | -", "1 | 2.10 || 1.55 | 2.5", "1 | 3.60 | 3.20 | 1.55 | 2", "|||| - "," |||| - "," 1 ",
            "129", "FSV Mainz 05 - SC Rot-Weiss Oberhausen", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 1.40 | 3.80 | 7.00 | -", "| ||| "," 1 | 1.95 | 3.50 | 2.50 | 0: 1 "," 1 | 1.05 | 2.50 | 1.18 | - "," 1 | 2.00 || 1.60 | 2.5 "," 1 | 3.40 | 3.20 | 1.60 | 2 "," 1 | 1.70 | 2.30 | 5.50 | - "," |||| - "," 1 ",
            "130", "Rot-Weiss Ahlen - SpVgg Greuther Furth", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 2.55 | 3.20 | 2.55 | -", "|| || "," ||||: "," 1 | 1.42 | 1.42 | 1.28 | - "," 1 | 2.10 || 1.55 | 2.5 "," 1 | 3.60 | 3.20 | 1.55 | 2 "," 1 | 3.00 | 2.00 | 3.00 | - "," |||| - "," 1 ",
            "131", "SC Freiburg - 1.FC Kaiserslautern", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 1.75 | 3.25 | 4.20 | -", "||| | "," ||||: "," 1 | 1.17 | 1.91 | 1.24 | - "," 1 | 2.10 || 1.55 | 2.5 "," 1 | 3.60 | 3.20 | 1.55 | 2 "," 1 | 2.30 | 2.10 | 3.80 | - "," |||| - "," 1 ",
            "132", "SV Wehen Wiesbaden - FC Hansa Rostock", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 5.00 | 3.70 | 1.55 | -", "||| | "," ||||: "," 1 | 2.23 | 1.09 | 1.23 | - "," 1 | 1.90 || 1.70 | 2.5 "," 1 | 3.10 | 3.10 | 1.70 | 2 "," 1 | 4.50 | 2.25 | 2.00 | - "," |||| - "," 1 ",
            "133", "TSV Alemannia Aachen - FC Augsburg", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 1.60 | 3.45 | 5.10 | -", "|||| "," ||||: "," 1 | 1.11 | 2.13 | 1.23 | - "," 1 | 2.10 || 1.55 | 2.5 "," 1 | 3.60 | 3.20 | 1.55 | 2 "," 1 | 2.10 | 2.20 | 4.30 | - "," |||| - "," 1 "
        );
        var events = showLeague (league, matches);
        hasEvents = hasEvents + events;
</script>

, , - , " var", , . , :

(0): 125
(1): 1.FC Nurnberg - TSV 1860 Munich
(2): 24 May 2009 14:00 
etc.

NB: , . !

+2
6

, , CSV . .

: codeproject.com: CSV-. , CSV .

OLEDB: # - OLEDB CSV.

IndexOf(), "var matches = new Array(" ");", CSV.

+3

:

using System.Text.RegularExpressions;

public static MatchCollection getMatches(String input, String pattern) {
   Regex re = new Regex(pattern);
   return re.Matches(input);
}

public static void Example() {
   String pattern1 = "var matches = new Array\\(([^\\)]+)\\)";

   MatchCollection results = getMatches(RandomTest, pattern1);
   String marray = results[0].Groups[1].Value;

   String pattern2 = "\"([^\"]+)\"";
   List<String> values = new List<String>();
   foreach (Match value in getMatches(marray,pattern2)) {
      //Your values are in the Groups property
      values.Add(value.Groups[1].Value);
      Console.WriteLine(value.Groups[1].Value);
   }
}

,

+1

Regex :

"var matches = new Array\(\s+(.*?)\s+\)"

... .Split .

+1

, " " ""

string [] test=Regex.Split(s.SubString(1,s.length-2), "\",\"");
0

, - :

var matches = new Array\(\s*("(?:[^\\"]*|\\.)*"\s*(?:,\s*"(?:[^\\"]*|\\.)*")*)\s*\);

. :

"(?:[^\\"]*|\\.)*"

: . CSV .

0

, :

/// <returns>Returns all values inside matches array in a single list</returns>
        public static List<string> GetMatchesArray(String inputString)
        {
            // Matches var matches = new Array( ... );
            Regex r = new Regex("(var matches = new Array\\([^\\)]*\\);)",
                RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);

            string arrayString = r.Match(inputString).Groups[0].Value;

            List<string> quotedList = new List<string>();

            // Matches all the data between the quotes inside var matches
            r = new Regex("\"([^\"]+)\"", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);
            for (Match m = r.Match(arrayString); m.Success; m = m.NextMatch())
            {
                quotedList.Add(m.Groups[1].Value);
            }

            return quotedList;
        }

, , . :

/// This will help you store the data in a list in a more meaningful way, 
/// so that you are able to organize the data per line
/// Returns all the quoted text per line in a list of lines
public static List<List<string>> GetMatchesArrayPerLine(String inputString)
{
    // Matches var matches = new Array( ... )
    Regex r = new Regex("(var matches = new Array\\([^\\)]*\\);)",
        RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);

    string arrayString = r.Match(inputString).Groups[0].Value;

    List<string> lineList = new List<string>();

    // Matches all the lines and stores them in lineList one line per item. For e.g.
    // "125","1.FC Nurnberg - TSV 1860 Munich", ...
    // "126","FC Ingolstadt 04 - TuS Koblenz", ...
    r = new Regex("\n(.*)", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);
    for (Match m = r.Match(arrayString); m.Success; m = m.NextMatch())
    {
        lineList.Add(m.Groups[1].Value);
    }

    List<List<string>> quotedListPerLine = new List<List<string>>();

    // Matches the quoted text per line. 
    // This will help you store data in an organised way rather than just a list of values
    // Similar to a 2D array
    // quotedListPerLine[0] = List<string> containing { "125", "1.FC Nurnberg - TSV 1860 Munich", ... }
    // quotedListPerLine[1] = List<string> containing { "126","FC Ingolstadt 04 - TuS Koblenz", ... }
    r = new Regex("\"([^\"]+)\"", RegexOptions.IgnoreCase | RegexOptions.Compiled);
    foreach (string line in lineList)
    {
        List<string> quotedList = new List<string>();
        for (Match m = r.Match(line); m.Success; m = m.NextMatch())
        {
            quotedList.Add(m.Groups[1].Value);
        }
        quotedListPerLine.Add(quotedList);
    }

    return quotedListPerLine;
}

:

List<List<string>> quotedListLines = MyRegEx.GetMatchesArrayPerLine(a);
foreach (List<string> line in quotedListLines)
{
    Console.WriteLine("----LINE---");
    foreach (string quotedText in line)
        Console.WriteLine(quotedText);
}
0

Source: https://habr.com/ru/post/1720769/


All Articles