C # and Regex: how to extract strings between quotes
Suppose I have the following line:
<script language = "javascript">
var league = new Array (
"Soccer", "Germany - 2. Bundesliga", "38542195", "102", "24 May 2009 14:00", "24 May 2009 14:00", "1X2", "1", "0"
);
var matches = new Array (
"125", "1.FC Nurnberg - TSV 1860 Munich", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 1.40 | 4.10 | 6.40 | -", "|| || "," 1 | 1.90 | 3.50 | 2.20 | 0: 1 "," 1 | 1.05 | 2.20 | 1.18 | - "," 1 | 2.00 || 1.60 | 2.5 "," 1 | 3.40 | 3.20 | 1.60 | 2 "," 1 | 1.70 | 2.50 | 5.50 | - "," |||| - "," 1 ",
"126", "FC Ingolstadt 04 - TuS Koblenz", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 3.60 | 2.80 | 2.00 | -", "|||| "," ||||: "," 1 | 1.68 | 1.25 | 1.26 | - "," 1 | 1.90 || 1.70 | 2.5 "," 1 | 3.10 | 3.10 | 1.70 | 2 "," 1 | 3.60 | 2.10 | 2.45 | - "," |||| - "," 1 ",
"127", "FC St. Pauli 1910 - FSV Frankfurt", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 2.50 | 2.95 | 2.60 | -", "|| || "," ||||: "," 1 | 1.41 | 1.44 | 1.28 | - "," 1 | 2.00 || 1.60 | 2.5 "," 1 | 3.40 | 3.20 | 1.60 | 2 "," 1 | 2.95 | 2.00 | 3.05 | - "," |||| - "," 1 ",
"128", "MSV Duisburg - VfL Osnabruck", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 2.30 | 3.60 | 2.40 | -", "||||" , "||||:", "1 | 1.35 | 1.51 | 1.27 | -", "1 | 2.10 || 1.55 | 2.5", "1 | 3.60 | 3.20 | 1.55 | 2", "|||| - "," |||| - "," 1 ",
"129", "FSV Mainz 05 - SC Rot-Weiss Oberhausen", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 1.40 | 3.80 | 7.00 | -", "| ||| "," 1 | 1.95 | 3.50 | 2.50 | 0: 1 "," 1 | 1.05 | 2.50 | 1.18 | - "," 1 | 2.00 || 1.60 | 2.5 "," 1 | 3.40 | 3.20 | 1.60 | 2 "," 1 | 1.70 | 2.30 | 5.50 | - "," |||| - "," 1 ",
"130", "Rot-Weiss Ahlen - SpVgg Greuther Furth", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 2.55 | 3.20 | 2.55 | -", "|| || "," ||||: "," 1 | 1.42 | 1.42 | 1.28 | - "," 1 | 2.10 || 1.55 | 2.5 "," 1 | 3.60 | 3.20 | 1.55 | 2 "," 1 | 3.00 | 2.00 | 3.00 | - "," |||| - "," 1 ",
"131", "SC Freiburg - 1.FC Kaiserslautern", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 1.75 | 3.25 | 4.20 | -", "||| | "," ||||: "," 1 | 1.17 | 1.91 | 1.24 | - "," 1 | 2.10 || 1.55 | 2.5 "," 1 | 3.60 | 3.20 | 1.55 | 2 "," 1 | 2.30 | 2.10 | 3.80 | - "," |||| - "," 1 ",
"132", "SV Wehen Wiesbaden - FC Hansa Rostock", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 5.00 | 3.70 | 1.55 | -", "||| | "," ||||: "," 1 | 2.23 | 1.09 | 1.23 | - "," 1 | 1.90 || 1.70 | 2.5 "," 1 | 3.10 | 3.10 | 1.70 | 2 "," 1 | 4.50 | 2.25 | 2.00 | - "," |||| - "," 1 ",
"133", "TSV Alemannia Aachen - FC Augsburg", "24 May 2009 14:00", "Sun, 05.24.09 14:00", "1 | 1.60 | 3.45 | 5.10 | -", "|||| "," ||||: "," 1 | 1.11 | 2.13 | 1.23 | - "," 1 | 2.10 || 1.55 | 2.5 "," 1 | 3.60 | 3.20 | 1.55 | 2 "," 1 | 2.10 | 2.20 | 4.30 | - "," |||| - "," 1 "
);
var events = showLeague (league, matches);
hasEvents = hasEvents + events;
</script>
, , - , " var", , . , :
(0): 125 (1): 1.FC Nurnberg - TSV 1860 Munich (2): 24 May 2009 14:00 etc.
NB: , . !
, , CSV . .
: codeproject.com: CSV-. , CSV .
OLEDB: # - OLEDB CSV.
IndexOf(), "var matches = new Array(" ");", CSV.
:
using System.Text.RegularExpressions;
public static MatchCollection getMatches(String input, String pattern) {
Regex re = new Regex(pattern);
return re.Matches(input);
}
public static void Example() {
String pattern1 = "var matches = new Array\\(([^\\)]+)\\)";
MatchCollection results = getMatches(RandomTest, pattern1);
String marray = results[0].Groups[1].Value;
String pattern2 = "\"([^\"]+)\"";
List<String> values = new List<String>();
foreach (Match value in getMatches(marray,pattern2)) {
//Your values are in the Groups property
values.Add(value.Groups[1].Value);
Console.WriteLine(value.Groups[1].Value);
}
}
,
, :
/// <returns>Returns all values inside matches array in a single list</returns>
public static List<string> GetMatchesArray(String inputString)
{
// Matches var matches = new Array( ... );
Regex r = new Regex("(var matches = new Array\\([^\\)]*\\);)",
RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);
string arrayString = r.Match(inputString).Groups[0].Value;
List<string> quotedList = new List<string>();
// Matches all the data between the quotes inside var matches
r = new Regex("\"([^\"]+)\"", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);
for (Match m = r.Match(arrayString); m.Success; m = m.NextMatch())
{
quotedList.Add(m.Groups[1].Value);
}
return quotedList;
}
, , . :
/// This will help you store the data in a list in a more meaningful way,
/// so that you are able to organize the data per line
/// Returns all the quoted text per line in a list of lines
public static List<List<string>> GetMatchesArrayPerLine(String inputString)
{
// Matches var matches = new Array( ... )
Regex r = new Regex("(var matches = new Array\\([^\\)]*\\);)",
RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);
string arrayString = r.Match(inputString).Groups[0].Value;
List<string> lineList = new List<string>();
// Matches all the lines and stores them in lineList one line per item. For e.g.
// "125","1.FC Nurnberg - TSV 1860 Munich", ...
// "126","FC Ingolstadt 04 - TuS Koblenz", ...
r = new Regex("\n(.*)", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);
for (Match m = r.Match(arrayString); m.Success; m = m.NextMatch())
{
lineList.Add(m.Groups[1].Value);
}
List<List<string>> quotedListPerLine = new List<List<string>>();
// Matches the quoted text per line.
// This will help you store data in an organised way rather than just a list of values
// Similar to a 2D array
// quotedListPerLine[0] = List<string> containing { "125", "1.FC Nurnberg - TSV 1860 Munich", ... }
// quotedListPerLine[1] = List<string> containing { "126","FC Ingolstadt 04 - TuS Koblenz", ... }
r = new Regex("\"([^\"]+)\"", RegexOptions.IgnoreCase | RegexOptions.Compiled);
foreach (string line in lineList)
{
List<string> quotedList = new List<string>();
for (Match m = r.Match(line); m.Success; m = m.NextMatch())
{
quotedList.Add(m.Groups[1].Value);
}
quotedListPerLine.Add(quotedList);
}
return quotedListPerLine;
}
:
List<List<string>> quotedListLines = MyRegEx.GetMatchesArrayPerLine(a);
foreach (List<string> line in quotedListLines)
{
Console.WriteLine("----LINE---");
foreach (string quotedText in line)
Console.WriteLine(quotedText);
}