1,获取以及商品分类信息

给一网页获取网页上商品信息的分类

using Skay.WebBot;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Ivony.Html;
using Ivony.Html.Parser;
using System.Data.SqlClient; namespace catchGoods
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
public static Thread th;
private void button1_Click(object sender, EventArgs e)
{
th = new Thread(GetJDData);
th.Start();
}
public void GetJDData()
{
SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn.Open(); string str = "http://www.htluxe.com";
HttpUtility http = new HttpUtility();
string html = http.GetHtmlText(str);
var documenthtml = new JumonyParser().Parse(html);
var items = documenthtml.Find(".categroup dl");
foreach(var item in items)
{
string name = item.FindFirst("h4 a").InnerText();
string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
string remark = remarkOdd.Split('=')[];
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(name); }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼(委托?
string into = string.Format("insert into exerciseOneSort (className, remark) values ('" + name + "', '" + remark + "')");
SqlCommand com = new SqlCommand(into, conn);
int i = com.ExecuteNonQuery(); var elements = item.Find("dt p a");
foreach(var element in elements)
{
string nameTwo = element.InnerText();
string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values ('" + nameTwo + "', '" + url + "', '" + remark + "')");
SqlCommand comTwo = new SqlCommand(intoTwo, conn);
int j = comTwo.ExecuteNonQuery();
}
}
}
}
}

完整版

using Skay.WebBot;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Ivony.Html;
using Ivony.Html.Parser;
using System.Data.SqlClient;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json; namespace catchGoods
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
public static Thread th;
private void button1_Click(object sender, EventArgs e)
{
th = new Thread(GetJDDataOne);
th.Start();
//SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
//conn.Open();
//string str = string.Format("delete from exerciseTwoSort");
//SqlCommand com = new SqlCommand(str, conn);
//int w = com.ExecuteNonQuery();
}
public void GetJDDataOne()
{
SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn.Open(); string str = "http://www.htluxe.com";
HttpUtility http = new HttpUtility();
string html = http.GetHtmlText(str);
var documenthtml = new JumonyParser().Parse(html);
var items = documenthtml.Find(".categroup dl");
foreach(var item in items)
{
string name = item.FindFirst("h4 a").InnerText();
string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
string remark = remarkOdd.Split('=')[];
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(name+" "+remark); }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
string into = string.Format("insert into exerciseOneSort (className, remark) values ('" + name + "', '" + remark + "')");
SqlCommand com = new SqlCommand(into, conn);
int i = com.ExecuteNonQuery(); var elements = item.Find("dt p a");
foreach(var element in elements)
{
string nameTwo = element.InnerText();
string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(nameTwo + " " +url + " " + remark); }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values ('" + nameTwo + "', '" + url + "', '" + remark + "')");
SqlCommand comTwo = new SqlCommand(intoTwo, conn);
int j = comTwo.ExecuteNonQuery();
}
}
}
int page = ;
string surl;
public static Thread th2;
private void button2_Click(object sender, EventArgs e)
{
listBox1.Items.Clear();
th2 = new Thread(threadTwo);
th2.Start();
//SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
//conn.Open();
//string str = string.Format("delete from GoodsList");
//SqlCommand com = new SqlCommand(str, conn);
//int d = com.ExecuteNonQuery();
//MessageBox.Show(Convert.ToString(d));
}
public void threadTwo()
{
SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn.Open();
//如果字符串中含有单引号,解决方法1----------------------------------
//string titlestr = "念佛'夜晚访'问欧诺'法";
//string pricestr = "99.00";
//string sqlstr = string.Format("insert into goods (name,price) values (@name,'" + pricestr + "')");
//SqlCommand sqlcom = new SqlCommand(sqlstr, conn);
//sqlcom.Parameters.Add("@name", titlestr);
//sqlcom.ExecuteNonQuery();
//解决方法2-----------------------------------------------------------------------
//string bufffuck = "fdgjjf'fgfgf";
//bufffuck = bufffuck.Replace("'", "''");
//string sqlstr = string.Format("insert into goods (name) values ('"+bufffuck+"')");
//SqlCommand sqlcom = new SqlCommand(sqlstr, conn);
//int y = sqlcom.ExecuteNonQuery(); string sel = string.Format("select url from exerciseTwoSort");
DataTable dt = new DataTable();
SqlDataAdapter dapt = new SqlDataAdapter(sel, conn);
dapt.Fill(dt); for (int i = ; i < dt.Rows.Count; i++)
{
surl = dt.Rows[i][].ToString();
HttpUtility httpTwo = new HttpUtility();
string htmlTwo = httpTwo.GetHtmlText(surl);
var documenthtml = new JumonyParser().Parse(htmlTwo);
var pageto = Convert.ToString(documenthtml.FindFirst(".goods-page-min label").InnerText());
page = Convert.ToInt32(pageto.Split('/')[]);
GetJDData();
}
}
void GetJDData()
{
for (int j = ; j <= page; j++)
{
string htmlTwo = surl + "&price_min=0&price_max=0&page=" + j + "&sort=sort_order%20asc,last_update&order=DESC";
HttpUtility httpMid = new HttpUtility();
string htmlMid = httpMid.GetHtmlText(htmlTwo);
var documenthtmlMid = new JumonyParser().Parse(htmlMid);
var items = documenthtmlMid.Find(".piclist li");
foreach(var item in items)
{
string title = item.FindFirst(".base a").InnerText();
title = title.Replace("'", "''");
//string goodsurl = "http://www.htluxe.com/"+item.FindFirst(".base a").Attribute("href").Value();
//string subhtml = http.GetHtmlText(goodsurl, "utf-8", "text/html; charset=utf-8");
//string Area_Html = http.GetHtmlText(goodsurl.Split('?')[0] + "?act=price&" + goodsurl.Split('?')[1], "utf-8", "text/html;charset=utf-8", "");
try
{
string nowPrice = item.FindFirst(".minprice").InnerText();
string oldPrice = item.FindFirst(".maxprice").InnerText();
string popular = item.FindFirst(".ratecount strong").InnerText();
string sales = item.FindFirst(".soldnum strong").InnerText();
string contents = item.FindFirst(".commentcount strong").InnerText().ToString();
string htmlThree = "http://www.htluxe.com/" + item.FindFirst("dt a").Attribute("href").Value().ToString();
HttpUtility httpThree = new HttpUtility();
string htmlBuff = httpThree.GetHtmlText(htmlThree);
var documenthtmlThree = new JumonyParser().Parse(htmlBuff);
string sben = documenthtmlThree.FindFirst(".promotionMiddleTop p").InnerText().ToString();
string num = sben.Split(':')[]; string starLevel = documenthtmlThree.FindFirst(".m-ratescore i").InnerText().ToString();
bufff(title, nowPrice, oldPrice, popular, sales, num, contents, starLevel);
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(title + " " + nowPrice + " " + num + " " + oldPrice + " " + sales + " " + popular + " " + contents + " " + starLevel); }));
//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
//this.listBox1.Items.Add("");
//listBox1.Items.Add(title + " " + nowPrice + " " + num + " " + oldPrice + " " + sales + " " + popular); }
catch
{
MessageBox.Show("异常");
} } }
}
private static void bufff(string title, string nowPrice, string oldPrice,
string popular, string sales, string num, string contents, string starLevel)
{
SqlConnection conn2 = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn2.Open(); string strstr = string.Format("insert into GoodsList (name, num, sales, popular, starLevel, contents, price, oldPrice) values ('" + title + "', '" +num + "', '" + sales + "', '" + popular + "', '"+starLevel+"', '"+contents+"', '" + nowPrice + "', '" + oldPrice + "')");
SqlCommand com2 = new SqlCommand(strstr, conn2);
int g = com2.ExecuteNonQuery();
}
}
}

C#获取网页信息并存入数据库的更多相关文章

  1. C# HttpWebRequest 绝技 根据URL地址获取网页信息

    如果要使用中间的方法的话,可以访问我的帮助类完全免费开源:C# HttpHelper,帮助类,真正的Httprequest请求时无视编码,无视证书,无视Cookie,网页抓取 1.第一招,根据URL地 ...

  2. 使用URLConnection获取网页信息的基本流程

    参考自core java v2, chapter3 Networking. 注:URLConnection的子类HttpURLConnection被广泛用于Android网络客户端编程,它与apach ...

  3. 使用URLConnection获取网页信息的基本流程 分类: H1_ANDROID 2013-10-12 23:51 3646人阅读 评论(0) 收藏

    参考自core java v2, chapter3 Networking. 注:URLConnection的子类HttpURLConnection被广泛用于Android网络客户端编程,它与apach ...

  4. C# 获取网页信息

    获取网页源码 ///通过HttpWebResponse public string GetUrlHtml(string url) { string strHtml = string.Empty; Ht ...

  5. Python 爬虫 招聘信息并存入数据库

    新学习了selenium,啪一下腾讯招聘 from lxml import etree from selenium import webdriver import pymysql def Geturl ...

  6. C#获取网页信息核心方法(入门一)

    目录:信息采集入门系列目录 下面记录的是我自己整理的C#请求页面核心类,主要有如下几个方法 1.HttpWebRequest Get请求获得页面html 2.HttpWebRequest Post请求 ...

  7. python爬虫爬取ip记录网站信息并存入数据库

    import requests import re import pymysql #10页 仔细观察路由 db = pymysql.connect("localhost",&quo ...

  8. python获取网页信息的三种方法

    import urllib.request import http.cookiejar url = 'http://www.baidu.com/' # 方法一 print('方法一') req_one ...

  9. 获取网页上数据(图片、文字、视频)-b

    Demo地址:http://download.csdn.net/detail/u012881779/8831835 获取网页上所有图片.获取所有html.获取网页title.获取网页内容文字... . ...

随机推荐

  1. equals与== 和toString方法

    /** * equals()方法的使用 * * 1.java.lang.Object类中的equals()方法的定义: * * public boolean equals(Object obj) { ...

  2. 04: CI(持续集成)/CD(持续交付/持续部署)

    1.1 持续集成.持续交付 介绍   参考博客:https://www.cnblogs.com/cay83/p/8856231.html 1.传统交付 1. 传统软件的开发与交付的周期都很漫长,从需求 ...

  3. py3 base64加密

    import base64 #字符串编码: encodestr = base64.b64encode('abcr34r344r'.encode('utf-8')) print(str(encodest ...

  4. 使用CefSharp在.NET中嵌入Google kernel

    原文:使用CefSharp在.NET中嵌入Google kernel 使用CefSharp可以在.NET轻松的嵌入Html,不用担心WPF与Winform 控件与它的兼容性问题,CefSharp大部分 ...

  5. Property 'showModal' does no t exist on type 'JQuery<HTMLElement>'

    在 TS 代码中使用 jQuery 等库时配合插件使用,但是插件的开发人员并没有为其扩展 jQuery 的类型定义,这是使用插件的方法必然会报 TS 的类型错误,这时候要快速选择忽略该报错的最直接的方 ...

  6. 51nod 2589 快速讨伐

    51nod 如果不考虑升级操作,只有买装备操作和打怪操作,那么首先一定要先买装备,然后可以打死1级的怪,这些怪被打死的时间只要在第一次买装备后面好了,因为现在总操作是\(n+\sum a_i\)个,所 ...

  7. Python基础——函数进阶

    等待更新…………………… 后面再写

  8. Android 一共有多少种动画?准确告诉你!

    Android 动画 Android 动画在开发中是不可或缺的功能,或者说是界面灵动的添加剂.那你是否总结过 Android 中总共为开发者提供了多少种方式的动画呢?今天就为大家总结归纳一下.   报 ...

  9. python 父子节点生成字典

    lines = [(1, 1, '父1节点'), (2, 1, '1-2'), (3, 1, '1-3'), (4, 3, '1-3-4'), (5, 3, '1-3-5'), (6, 3, '1-3 ...

  10. PC端QQ协议说明,完美搞定QQ智能助手

    一. 实验目的: 在虚拟机下NAT模式下通过Wireshark抓包,分析QQ的传输模式.了解QQ在传输信息过程中用到的协议.分析在Nat模式下,信息传输的穿透性. 二. 实验环境: Win7 专业版3 ...