Very interesting piece of code, HTML code for the realization of all the contents of the picture information extraction, we have the time to look into the problem and improve the process.
protected ArrayList GetAList(string HtmlContent)
{
try
{
ArrayList arr = new ArrayList();
HtmlContent = content.Replace(\"\\r\\n\",\"\");
HtmlContent = content.Trim();
string partern = @\"\\<img(.*?)\\>\";//@\"<a (.*)>.*</a> \";//\"<a.*(?=Headline)(.|\\n)*?</a>\";//@\"/<a (.*)>.*<\\/\\a>/ \";
System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(partern, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.MatchCollection mc = regex.Matches(HtmlContent);
if(mc.Count <= 0)
return arr;
System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@\"(\'|\"\"|/)?[\\w_()]*(.jpg|.bmp|.gif|.png|.jpeg)\",System.Text.RegularExpressions.RegexOptions.IgnoreCase);
foreach(Match m in mc)
{
System.Text.RegularExpressions.MatchCollection m2 = regex2.Matches(m.ToString());
if( m2.Count > 0 )
{
arr.Add( m2[0].ToString().Substring(1) );
}
}
return arr;
}
catch(Exception ex)
{
throw ex;
}
}
|