代码语言
.
CSharp
.
JS
Java
Asp.Net
C
MSSQL
PHP
Css
PLSQL
Python
Shell
EBS
ASP
Perl
ObjC
VB.Net
VBS
MYSQL
GO
Delphi
AS
DB2
Domino
Rails
ActionScript
Scala
代码分类
文件
系统
字符串
数据库
网络相关
图形/GUI
多媒体
算法
游戏
Jquery
Extjs
Android
HTML5
菜单
网页交互
WinForm
控件
企业应用
安全与加密
脚本/批处理
开放平台
其它
【
CSharp
】
豆瓣美女抓取
作者:
兜兜里有糖豆
/ 发布于
2014/7/7
/
728
using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Runtime.InteropServices; using System.Text; using System.Threading; using System.Threading.Tasks; namespace douban { class Program { static string path = @"E:\doubanPic\"; static void Main(string[] args) { int factNum = 0; while (true) { Console.WriteLine("输入想要下载的页数"); var input = Console.ReadLine(); if (!int.TryParse(input, out factNum)) { Console.WriteLine("输入非法"); continue; } break; } Console.WriteLine("共下载的页数:" + factNum); try { if (!Directory.Exists(path)) Directory.CreateDirectory(path); System.Diagnostics.Process.Start("explorer.exe", path); for (int i = 1; i <= factNum; i++) { var ls = GetPageContent(i); Console.WriteLine(string.Format("第{0}页共有图片{1}张", i, ls.Count)); var index = 1; foreach (var l in ls) { var s = l; string picType = s.Substring(s.LastIndexOf(".")); if (s.IndexOf("http://") == -1) { s = "http://www.dbmeizi.com" + s; } //Thread th = new Thread(new ThreadStart( // () => FileHelper.DownloadWebFile(s, Guid.NewGuid() + picType, path)) // ); //th.Start(); FileHelper.DownloadWebFile(s, Guid.NewGuid() + picType, path); Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine(string.Format("下载第{0}页第{1}张图片结束", i, index)); Console.ForegroundColor = ConsoleColor.White; index++; } Console.WriteLine(string.Format("下载第{0}页图片结束", i)); } } catch (Exception) { } Console.WriteLine(string.Format("所有的图片下载结束")); } private static List<String> GetPageContent(int pageNum) { var myWebClient = new WebClient(); var url = "http://www.dbmeizi.com/?p=" + pageNum; myWebClient.Credentials = CredentialCache.DefaultCredentials; var stream = myWebClient.OpenRead(new Uri(url)); if (stream == null) return null; var sr = new StreamReader(stream, Encoding.UTF8); var sb = new StringBuilder(); var line = string.Empty; var flag = 0; while ((line = sr.ReadLine()) != null) { sb.Append(line); flag++; } if (flag == 0) return null; return PicUrlHelper.GetTextImageSrc(sb.ToString()); } } } using System; using System.Collections.Generic; using System.Linq; using System.Runtime.CompilerServices; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace douban { public class PicUrlHelper { public static List<String> GetTextImageSrc(string context) { if (string.IsNullOrEmpty(context)) return null; var partten = "<\\s*[I|i][m|M][g|G]\\s+([^>]*)\\s*>"; var imgSrcList = new List<string>(); var matches = Regex.Matches(context, partten); //初步的pic地址 var ls = (from Match nextMatch in matches select nextMatch.Value).ToList(); partten = "[s|S][R|r][c|C]=[\"|'](.*?)[\"|']"; foreach (var s in ls) { var match = Regex.Match(s, partten).Value; if (string.IsNullOrEmpty(match)) continue; match = match.Replace("[s|S][R|r][c|C]=[\"|']", "").Replace("[\"|']", "").Replace("src=","").Replace("\"",""); imgSrcList.Add(match); } return imgSrcList; } } } using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Threading.Tasks; namespace douban { public class FileHelper { public static void DownloadWebFile(string webFileUrl, string fileName, string parentPath) { if (!Directory.Exists(parentPath)) Directory.CreateDirectory(parentPath); var myWebClient = new WebClient(); myWebClient.Credentials = CredentialCache.DefaultCredentials; var inStream = myWebClient.OpenRead(new Uri(webFileUrl)); var fileFullPath = parentPath + fileName; var fs = new FileStream(fileFullPath, FileMode.OpenOrCreate, FileAccess.Write); var by = new byte[10240]; int c = 0; while ((c = inStream.Read(by, 0, 10240)) > 0) { fs.Write(by, 0, c); } fs.Close(); } } }
试试其它关键字
豆瓣
美女
抓取
同语言下
.
文件IO 操作类库
.
Check图片类型[JPEG(.jpg 、.jpeg),TIF,GIF,BMP,PNG,P
.
机器名和IP取得(IPV4 IPV6)
.
Tiff转换Bitmap
.
linqHelper
.
MadieHelper.cs
.
RegHelper.cs
.
如果关闭一个窗体后激活另一个窗体的事件或方法
.
创建日志通用类
.
串口辅助开发类
可能有用的
.
C#实现的html内容截取
.
List 切割成几份 工具类
.
SQL查询 多列合并成一行用逗号隔开
.
一行一行读取txt的内容
.
C#动态修改文件夹名称(FSO实现,不移动文件)
.
c# 移动文件或文件夹
.
c#图片添加水印
.
Java PDF转换成图片并输出给前台展示
.
网站后台修改图片尺寸代码
.
处理大图片在缩略图时的展示
兜兜里有糖豆
贡献的其它代码
(
1
)
.
豆瓣美女抓取
Copyright © 2004 - 2024 dezai.cn. All Rights Reserved
站长博客
粤ICP备13059550号-3