1,获取以及商品分类信息

给一网页获取网页上商品信息的分类

using Skay.WebBot;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Ivony.Html;
using Ivony.Html.Parser;
using System.Data.SqlClient; namespace catchGoods
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
public static Thread th;
private void button1_Click(object sender, EventArgs e)
{
th = new Thread(GetJDData);
th.Start();
}
public void GetJDData()
{
SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn.Open(); string str = "http://www.htluxe.com";
HttpUtility http = new HttpUtility();
string html = http.GetHtmlText(str);
var documenthtml = new JumonyParser().Parse(html);
var items = documenthtml.Find(".categroup dl");
foreach(var item in items)
{
string name = item.FindFirst("h4 a").InnerText();
string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
string remark = remarkOdd.Split('=')[];
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(name); }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼(委托?
string into = string.Format("insert into exerciseOneSort (className, remark) values ('" + name + "', '" + remark + "')");
SqlCommand com = new SqlCommand(into, conn);
int i = com.ExecuteNonQuery(); var elements = item.Find("dt p a");
foreach(var element in elements)
{
string nameTwo = element.InnerText();
string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values ('" + nameTwo + "', '" + url + "', '" + remark + "')");
SqlCommand comTwo = new SqlCommand(intoTwo, conn);
int j = comTwo.ExecuteNonQuery();
}
}
}
}
}

完整版

using Skay.WebBot;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Ivony.Html;
using Ivony.Html.Parser;
using System.Data.SqlClient;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json; namespace catchGoods
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
public static Thread th;
private void button1_Click(object sender, EventArgs e)
{
th = new Thread(GetJDDataOne);
th.Start();
//SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
//conn.Open();
//string str = string.Format("delete from exerciseTwoSort");
//SqlCommand com = new SqlCommand(str, conn);
//int w = com.ExecuteNonQuery();
}
public void GetJDDataOne()
{
SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn.Open(); string str = "http://www.htluxe.com";
HttpUtility http = new HttpUtility();
string html = http.GetHtmlText(str);
var documenthtml = new JumonyParser().Parse(html);
var items = documenthtml.Find(".categroup dl");
foreach(var item in items)
{
string name = item.FindFirst("h4 a").InnerText();
string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
string remark = remarkOdd.Split('=')[];
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(name+" "+remark); }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
string into = string.Format("insert into exerciseOneSort (className, remark) values ('" + name + "', '" + remark + "')");
SqlCommand com = new SqlCommand(into, conn);
int i = com.ExecuteNonQuery(); var elements = item.Find("dt p a");
foreach(var element in elements)
{
string nameTwo = element.InnerText();
string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(nameTwo + " " +url + " " + remark); }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values ('" + nameTwo + "', '" + url + "', '" + remark + "')");
SqlCommand comTwo = new SqlCommand(intoTwo, conn);
int j = comTwo.ExecuteNonQuery();
}
}
}
int page = ;
string surl;
public static Thread th2;
private void button2_Click(object sender, EventArgs e)
{
listBox1.Items.Clear();
th2 = new Thread(threadTwo);
th2.Start();
//SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
//conn.Open();
//string str = string.Format("delete from GoodsList");
//SqlCommand com = new SqlCommand(str, conn);
//int d = com.ExecuteNonQuery();
//MessageBox.Show(Convert.ToString(d));
}
public void threadTwo()
{
SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn.Open();
//如果字符串中含有单引号,解决方法1----------------------------------
//string titlestr = "念佛'夜晚访'问欧诺'法";
//string pricestr = "99.00";
//string sqlstr = string.Format("insert into goods (name,price) values (@name,'" + pricestr + "')");
//SqlCommand sqlcom = new SqlCommand(sqlstr, conn);
//sqlcom.Parameters.Add("@name", titlestr);
//sqlcom.ExecuteNonQuery();
//解决方法2-----------------------------------------------------------------------
//string bufffuck = "fdgjjf'fgfgf";
//bufffuck = bufffuck.Replace("'", "''");
//string sqlstr = string.Format("insert into goods (name) values ('"+bufffuck+"')");
//SqlCommand sqlcom = new SqlCommand(sqlstr, conn);
//int y = sqlcom.ExecuteNonQuery(); string sel = string.Format("select url from exerciseTwoSort");
DataTable dt = new DataTable();
SqlDataAdapter dapt = new SqlDataAdapter(sel, conn);
dapt.Fill(dt); for (int i = ; i < dt.Rows.Count; i++)
{
surl = dt.Rows[i][].ToString();
HttpUtility httpTwo = new HttpUtility();
string htmlTwo = httpTwo.GetHtmlText(surl);
var documenthtml = new JumonyParser().Parse(htmlTwo);
var pageto = Convert.ToString(documenthtml.FindFirst(".goods-page-min label").InnerText());
page = Convert.ToInt32(pageto.Split('/')[]);
GetJDData();
}
}
void GetJDData()
{
for (int j = ; j <= page; j++)
{
string htmlTwo = surl + "&price_min=0&price_max=0&page=" + j + "&sort=sort_order%20asc,last_update&order=DESC";
HttpUtility httpMid = new HttpUtility();
string htmlMid = httpMid.GetHtmlText(htmlTwo);
var documenthtmlMid = new JumonyParser().Parse(htmlMid);
var items = documenthtmlMid.Find(".piclist li");
foreach(var item in items)
{
string title = item.FindFirst(".base a").InnerText();
title = title.Replace("'", "''");
//string goodsurl = "http://www.htluxe.com/"+item.FindFirst(".base a").Attribute("href").Value();
//string subhtml = http.GetHtmlText(goodsurl, "utf-8", "text/html; charset=utf-8");
//string Area_Html = http.GetHtmlText(goodsurl.Split('?')[0] + "?act=price&" + goodsurl.Split('?')[1], "utf-8", "text/html;charset=utf-8", "");
try
{
string nowPrice = item.FindFirst(".minprice").InnerText();
string oldPrice = item.FindFirst(".maxprice").InnerText();
string popular = item.FindFirst(".ratecount strong").InnerText();
string sales = item.FindFirst(".soldnum strong").InnerText();
string contents = item.FindFirst(".commentcount strong").InnerText().ToString();
string htmlThree = "http://www.htluxe.com/" + item.FindFirst("dt a").Attribute("href").Value().ToString();
HttpUtility httpThree = new HttpUtility();
string htmlBuff = httpThree.GetHtmlText(htmlThree);
var documenthtmlThree = new JumonyParser().Parse(htmlBuff);
string sben = documenthtmlThree.FindFirst(".promotionMiddleTop p").InnerText().ToString();
string num = sben.Split(':')[]; string starLevel = documenthtmlThree.FindFirst(".m-ratescore i").InnerText().ToString();
bufff(title, nowPrice, oldPrice, popular, sales, num, contents, starLevel);
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(title + " " + nowPrice + " " + num + " " + oldPrice + " " + sales + " " + popular + " " + contents + " " + starLevel); }));
//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
//this.listBox1.Items.Add("");
//listBox1.Items.Add(title + " " + nowPrice + " " + num + " " + oldPrice + " " + sales + " " + popular); }
catch
{
MessageBox.Show("异常");
} } }
}
private static void bufff(string title, string nowPrice, string oldPrice,
string popular, string sales, string num, string contents, string starLevel)
{
SqlConnection conn2 = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn2.Open(); string strstr = string.Format("insert into GoodsList (name, num, sales, popular, starLevel, contents, price, oldPrice) values ('" + title + "', '" +num + "', '" + sales + "', '" + popular + "', '"+starLevel+"', '"+contents+"', '" + nowPrice + "', '" + oldPrice + "')");
SqlCommand com2 = new SqlCommand(strstr, conn2);
int g = com2.ExecuteNonQuery();
}
}
}

C#获取网页信息并存入数据库的更多相关文章

  1. C# HttpWebRequest 绝技 根据URL地址获取网页信息

    如果要使用中间的方法的话,可以访问我的帮助类完全免费开源:C# HttpHelper,帮助类,真正的Httprequest请求时无视编码,无视证书,无视Cookie,网页抓取 1.第一招,根据URL地 ...

  2. 使用URLConnection获取网页信息的基本流程

    参考自core java v2, chapter3 Networking. 注:URLConnection的子类HttpURLConnection被广泛用于Android网络客户端编程,它与apach ...

  3. 使用URLConnection获取网页信息的基本流程 分类: H1_ANDROID 2013-10-12 23:51 3646人阅读 评论(0) 收藏

    参考自core java v2, chapter3 Networking. 注:URLConnection的子类HttpURLConnection被广泛用于Android网络客户端编程,它与apach ...

  4. C# 获取网页信息

    获取网页源码 ///通过HttpWebResponse public string GetUrlHtml(string url) { string strHtml = string.Empty; Ht ...

  5. Python 爬虫 招聘信息并存入数据库

    新学习了selenium,啪一下腾讯招聘 from lxml import etree from selenium import webdriver import pymysql def Geturl ...

  6. C#获取网页信息核心方法(入门一)

    目录:信息采集入门系列目录 下面记录的是我自己整理的C#请求页面核心类,主要有如下几个方法 1.HttpWebRequest Get请求获得页面html 2.HttpWebRequest Post请求 ...

  7. python爬虫爬取ip记录网站信息并存入数据库

    import requests import re import pymysql #10页 仔细观察路由 db = pymysql.connect("localhost",&quo ...

  8. python获取网页信息的三种方法

    import urllib.request import http.cookiejar url = 'http://www.baidu.com/' # 方法一 print('方法一') req_one ...

  9. 获取网页上数据(图片、文字、视频)-b

    Demo地址:http://download.csdn.net/detail/u012881779/8831835 获取网页上所有图片.获取所有html.获取网页title.获取网页内容文字... . ...

随机推荐

  1. A9-USART2_RX_BUF 串口2收发异常

    a9_send_cmd(); //退出透传模式,和前一次发送时间超过 2 秒,输入+++,就可以退出透传模式 delay_ms(); delay_ms(); delay_ms(); a9_quit_t ...

  2. numpy数组的索引和切片

    numpy数组的索引和切片 基本切片操作 >>> import numpy as np >>> arr=np.arange(10) >>> arr ...

  3. laravel框架之修改

    //控制器層 public function update(request $request) { $id = $request->get('id'); $data = DB::select(& ...

  4. Linux普通用户无法使用sudo

    问题描述: jenkins执行发布脚本,因为使用的是jenkins用户,所以有些shell命令需要 sudo 来执行,导致报错. + sudo rm -rf /usr/share/nginx/html ...

  5. python 链接mysql 事务

    import mysql.connector try: con = mysql.connector.connect( host="localhost", port="33 ...

  6. 深入解析Vue.js中v-bind v-model的使用和区别

    v-model 指令在表单控件元素上创建双向数据绑定,所谓双向绑定,指的就是我们在js中的vue实例中的data与其渲染的dom元素上的内容保持一致,两者无论谁被改变,另一方也会相应的更新为相同的数据 ...

  7. py3 base64加密

    import base64 #字符串编码: encodestr = base64.b64encode('abcr34r344r'.encode('utf-8')) print(str(encodest ...

  8. TCP和UDP头部格式的了解?

    tcp头部格式如下图所示: 1.源端口号,16位,发送方的端口号. 2.目标端口号,16位,发送方的目标端口号. 3.  32为序列号,sequence number,保证网络传输数据的顺序性. 4. ...

  9. npm学习(五)之使用package.json

    使用package.json 管理本地安装的npm包的最佳方法是创建一个package.json文件. 一个packagejson文件: 列出项目所依赖的包. 允许使用语义版本控制规则指定项目可以使用 ...

  10. 正则表达式、原始字符串及re

    正则表达式.原始字符串及re re是python中的一个文本解析工具,常用的方法有: 来源:https://www.ibm.com/developerworks/cn/opensource/os-cn ...