本文共 4006 字,大约阅读时间需要 13 分钟。
方法一:
WebRequest request = WebRequest.Create("http://www.cftea.com/");
WebResponse response = request.GetResponse(); StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312"));var contents = reader.ReadToEnd();
Console.WriteLine(StripHTML(contents));
reader.Close(); reader.Dispose(); response.Close(); Console.Read();方法二:(抓取html中table里面的数据)
string html = @"<html>
<head> <title></title> </head> <body> <table>a</table> <table>b</table> <table>c</table> <table>d</table> <table>e</table> </body> </html> "; var strReg = @"(?is)(?<=<table>).+?(?=</table>)"; List<string> result = new List<string>(); MatchCollection mc = Regex.Matches(html, strReg); foreach (Match m in mc) { //result.Add(m.Value); Console.WriteLine(m.Value); }方法三:
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(orgStr); var tables = htmlDocument.DocumentNode.SelectNodes("//table");//xpath的写法 foreach (var table in tables) { foreach (var tr in table.SelectNodes("//tr")) { var collegeName = tr.SelectNodes("//td").Skip(1).FirstOrDefault().InnerText; Console.WriteLine(collegeName); } }相关的网址:
方法四:(对网页中table里面的数据提取)
#region http://www.gaokao.com/e/20120109/4f0a8e1773aa0.shtml
//var url = "http://www.gaokao.com/e/20120109/4f0a8e1773aa0.shtml"; //var url = "http://www.gaokao.com/e/20120109/4f0a8e1773aa0_2.shtml"; //var url = "http://www.gaokao.com/e/20120109/4f0a8e1773aa0_3.shtml"; //var url = "http://www.gaokao.com/e/20120109/4f0a8e1773aa0_4.shtml"; //var url = "http://www.gaokao.com/e/20120109/4f0a8e1773aa0_5.shtml"; //var url = "http://www.gaokao.com/e/20120109/4f0a8e1773aa0_6.shtml"; //var orgStr = ChinaEduSp.Crawl.HttpUtility.GetContentByUrl(url, "gb2312"); //var htmlDocument = new HtmlDocument(); //htmlDocument.LoadHtml(orgStr); //var rows = htmlDocument.DocumentNode.SelectNodes("//table//tr"); //foreach (var item in rows) //{ // var pos = item.SelectSingleNode("td[1]").InnerText; // var school = item.SelectSingleNode("td[2]").InnerText; // var province = item.SelectSingleNode("td[3]").InnerText; // var type = item.SelectSingleNode("td[4]").InnerText; // var totalScore = item.SelectSingleNode("td[5]").InnerText; // var seq = db.RankingDescriptions.Count(); // //Response.Write("名次:" + pos + " 学校名称:" + school + " 所在省份:" + province + " 类型:" + type + " 总分:" + totalScore + "/r/n"); // //Response.Write("名次:" + pos + " 学校名称:" + school); // try // { // db.RankingDescriptions.Add(new RankingDescription // { // POS = Convert.ToInt32(pos), // SchoolName = school, // Province = province, // Area = province, // Type = type, // TotalScore = totalScore, // IsShow = true, // IsDelete = false, // RankId = 0, // Seq = seq // }); // db.SaveChanges(); // seq++; // } // catch (Exception ex) // { // string msg = ex.Message; // } //} #endregion#region http://www.gaokao.com/e/20120109/4f0a914934baa_2.shtml
//var url = "http://www.gaokao.com/e/20120109/4f0a914934baa_2.shtml"; //var orgStr = ChinaEduSp.Crawl.HttpUtility.GetContentByUrl(url, "gb2312"); //var htmlDocument = new HtmlDocument(); //htmlDocument.LoadHtml(orgStr); //var rows = htmlDocument.DocumentNode.SelectNodes("//table//tr//td//table//tr"); //foreach (var item in rows) //{ // var pos = item.SelectSingleNode("td[1]").InnerText; // var school = item.SelectSingleNode("td[2]").InnerText; // var province = item.SelectSingleNode("td[3]").InnerText; // var totalScore = item.SelectSingleNode("td[4]").InnerText; // var seq = db.RankingDescriptions.Count(); // //Response.Write("名次:" + pos + " 学校名称:" + school + " 所在省份:" + province + " 总分:" + totalScore + "/r/n"); // //Response.Write("名次:" + pos + " 学校名称:" + school); // try // { // db.RankingDescriptions.Add(new RankingDescription // { // POS = Convert.ToInt32(pos), // SchoolName = school, // Province = province, // Area = province, // TotalScore = totalScore, // IsShow = true, // IsDelete = false, // RankId = 0, // Seq = seq // }); // db.SaveChanges(); // seq++; // } // catch (Exception ex) // { // string msg = ex.Message; // } //} #endregion转载地址:http://sozmi.baihongyu.com/