I would recommend processing them for generalization (basically, removing numbers and names and making them the owners of places), and then grouping them in similar formats so that you have a group of samples that you can work with.
For example, 20th, 21st August 1987then it becomes [number][postfix], [number][postfix] [month] [year](provided that a is <number><st|th|rd|nd>recognized as a number and postfix, and the months are obvious, and the years are four-digit numbers).
, , , . , , , , ( , , , (#th[, $th[, ...]]) .)
, , , ( , ). , , :
(.*?)([0-9]{4})(?:, |$)
(.*?)(January|February|...)(?:, |$)
, :
(?:([0-9]{1,2})(?:st|nd|rd|th)(?:, )?)*(?:, |$)
. , , . , , .
, . , , , - , . , , . , PHP :
, . .
<?php
$samples = array(
'20th, 21st August 1897',
'31st May, 1st June 1909',
'29th January 2007',
'10th, 11th, 12th May 1954',
'26th, 27th, 28th, 29th, 30th March 2006',
'27th, 28th, 29th, 30th November, 1st December 2006',
'30th, 31st, December 2010, 1st, 2nd January 2011'
);
$months = array('january','february','march','april','may','june','july','august','september','october','november','december');
foreach ($samples as $sample)
{
$dates = array();
$yearly = null;
if (preg_match_all('/(?:^|\s)(?<month>.*?)\s?(?<year>[0-9]{4})(?:$|,)/',$sample,$yearly))
{
for ($y = 0; $y < count($yearly[0]); $y++)
{
$year = $yearly['year'][$y];
$monthly = null;
if (preg_match_all('/(?<days>(?:(?:^|\s)[0-9]{1,2}(?:st|nd|rd|th),?)*)\s?(?<month>'.implode('|',$months).')$/i',$yearly['month'][$y],$monthly))
{
for ($m = 0; $m < count($monthly[0]); $m++)
{
$month = $monthly['month'][$m];
$daily = null;
if (preg_match_all('/(?:^|\s)(?<day>[0-9]{1,2})(?:st|nd|rd|th)(?:,|$)/i',$monthly['days'][$m],$daily))
{
for ($d = 0; $d < count($daily[0]); $d++)
{
$day = $daily['day'][$d];
$dates[] = sprintf("%d-%d-%d", array_search(strtolower($month),$months)+1, $day, $year);
}
}
}
}
$data = $yearly[1];
}
}
echo "<p><b>{$sample}</b> was parsed to include:</p><ul>\r\n";
foreach ($dates as $date)
echo "<li>{$date}</li>\r\n";
echo "</ul>\r\n";
}
?>
20, 21 1897 . :
31 , 1 1909 :
29 2007 . :
1-29-2007
10, 11, 12 1954 . :
- 5-10-1954
- 5-11-1954
- 5-12-1954
26, 27, 28, 29, 30 2006 . :
3-26-2006
3-27-2006
3-28-2006
3-29-2006
3-30-2006
27, 28, 29, 30 , 1 2006 . , :
30, 31, 2010, 1, 2 2011 , :
- 12-30-2010
- 12-31-2010
- 1-1-2011
- 1-2-2011
, , http://www.ideone.com/GGMaH