C++实现网络爬虫
作者:网络转载 发布时间:[ 2014/8/27 10:37:54 ] 推荐标签:C++ 网络爬虫 NET
C++实现网络爬虫
#include <iostream>
#include <vector>
#include <list>
#include <map>
#include <queue>
#include <string>
#include <utility>
#include <regex>
#include <fstream>
#include <WinSock2.h>
#include <Windows.h>
#pragma comment(lib, "ws2_32.lib")
using namespace std;
void startupWSA()
{
WSADATA wsadata;
WSAStartup( MAKEWORD(2,0), &wsadata);
}
inline void cleanupWSA()
{
WSACleanup();
}
inline pair<string, string> binaryString(const string &str, const string &dilme)
{
pair<string, string> result(str, "");
auto pos = str.find(dilme);
if ( pos != string::npos )
{
result.first = str.substr(0, pos);
result.second = str.substr(pos + dilme.size());
}
return result;
}
inline string getIpByHostName(const string &hostName)
{
hostent* phost = gethostbyname( hostName.c_str() );
return phost? inet_ntoa(*(in_addr *)phost->h_addr_list[0]): "";
}
inline SOCKET connect(const string &hostName)
{
auto ip = getIpByHostName(hostName);
if ( ip.empty() )
return 0;
auto sock = socket(AF_INET, SOCK_STREAM, 0);
if ( sock == INVALID_SOCKET )
return 0;
SOCKADDR_IN addr;
addr.sin_family = AF_INET;
addr.sin_port = htons(80);
addr.sin_addr.s_addr = inet_addr(ip.c_str());
if ( connect(sock, (const sockaddr *)&addr, sizeof(SOCKADDR_IN)) == SOCKET_ERROR )
return 0;
return sock;
}
inline bool sendRequest(SOCKET sock, const string &host, const string &get)
{
string http
= "GET " + get + " HTTP/1.1
"
+ "HOST: " + host + "
"
+ "Connection: close
";
return http.size() == send(sock, &http[0], http.size(), 0);
}
inline string recvRequest(SOCKET sock)
{
static timeval wait = {2, 0};
static auto buffer = string(2048 * 100, '