private string FetchPage(String url) //取页面指定URL页面的源码 { String page = "null"; try { WebClient mywc = new WebClient(); using(Stream strm = mywc.OpenRead(url)) { StreamReader sr = new StreamReader(strm); page = sr.ReadToEnd(); strm.Close(); } } catch{} return page; }
public string GetLink(String url) //绝对链接的 { String content = this.FetchPage(url); //Regex linkRegex=new Regex("href\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+)", RegexOptions.IgnoreCase); //@"^\w+((-\w+)|(\.\w+))*\@\w+((\.|-)\w+)*\.\w+$"; Regex link=new Regex(@"http://([\w-]+\.)+[\w-]+(/[\w-./?%&=]*)?",RegexOptions.IgnoreCase); StringBuilder sb = new StringBuilder(); MatchCollection emailmatchs = link.Matches(content); foreach(Match n in emailmatchs) { sb.Append(n.ToString()); sb.Append(";"); } if( sb.Length == 0 ) { sb.Append("null"); } return sb.ToString(); } public string GetEmailAddr(String url) //相对链接的 { String content = this.FetchPage(url); //Regex linkRegex=new Regex("href\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+)", RegexOptions.IgnoreCase); //@"^\w+((-\w+)|(\.\w+))*\@\w+((\.|-)\w+)*\.\w+$"; Regex r=new Regex(@"\w+((-\w+)|(\.\w))*\@\w+((\.|-)\w+)*\.\w+[com|cn|com.cn|net|org|cc|uk]{1,6}",RegexOptions.IgnoreCase); StringBuilder sb = new StringBuilder(); MatchCollection emailmatchs = r.Matches(content); foreach(Match n in emailmatchs) { sb.Append(n.ToString()); sb.Append(";"); } if( sb.Length == 0 ) sb.Append("null"); return sb.ToString(); }
其中的部分代码要感我的朋友樊帆. 
|