使用Java抓取网页数据
作者:网络转载 发布时间:[ 2016/1/11 11:49:52 ] 推荐标签:测试开发技术 Java
二、使用HttpPost抓取网页数据
private static CloseableHttpClient httpClient;
private static BasicHttpContext httpContext;
private static BasicCookieStore cookieStore;
private static PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
private static RequestConfig globalConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.BEST_MATCH).build();
private static RequestConfig localConfig = RequestConfig.copy(globalConfig).setCookieSpec(CookieSpecs.BROWSER_COMPATIBILITY).build();
public String getHtml(String url){
HttpClientBuilder builder = HttpClients.custom();
cookieStore = new BasicCookieStore();
builder.setConnectionManager(cm);
builder.setDefaultCookieStore(cookieStore);
builder.setDefaultRequestConfig(globalConfig);
httpClient = builder.build();
httpContext = new BasicHttpContext();
httpContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore);
HttpPost httpPost = new HttpPost(url);
httpPost.setConfig(localConfig);
httpPost.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
httpPost.setHeader("Accept-Encoding","gzip, deflate");
httpPost.setHeader("Accept-Language","zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
httpPost.setHeader("Connection","keep-alive");
httpPost.setHeader("Cookie","ASP.NET_SessionId=11vrr4ucwsgeqtmpyfx4hmvx; _5t_trace_sid=89c4ffb8633d267e4ae322a157b52471; _5t_trace_tms=1; CheckCode=X0P64");
httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0");
List <NameValuePair> nvps = new ArrayList <NameValuePair>();
nvps.add(new BasicNameValuePair("pid", "99-C3-57-35-6D-70-3D-F2"));
nvps.add(new BasicNameValuePair("CurrentlyPageIndex", "2"));
httpPost.setEntity(new UrlEncodedFormEntity(nvps, Consts.UTF_8));
try {
CloseableHttpResponse response = httpClient.execute(httpPost,httpContext);
int status = response.getStatusLine().getStatusCode();
if (status >= 200 && status < 300) {
HttpEntity httpEntity = response.getEntity();
if(httpEntity!=null){
String cont = trimLineToString(httpEntity, "UTF-8");
EntityUtils.consume(httpEntity);
return cont;
}
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public synchronized static String trimLineToString(HttpEntity entiry,String charset) {
StringBuffer sb = new StringBuffer();
BufferedReader reader = null;
try {
InputStream instream = entiry.getContent();
reader = new BufferedReader(new InputStreamReader(instream, charset));
String str = null;
while ((str = reader.readLine()) != null) {
if(StringUtils.isNotBlank(str)) {
sb.append(str.trim());
}
}
instream.close();
} catch (IllegalStateException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return sb.toString();
}
本文内容不用于商业目的,如涉及知识产权问题,请权利人联系SPASVO小编(021-61079698-8054),我们将立即处理,马上删除。
相关推荐
Java性能测试有哪些不为众人所知的原则?Java设计模式??装饰者模式谈谈Java中遍历Map的几种方法Java Web入门必知你需要理解的Java反射机制知识总结编写更好的Java单元测试的7个技巧编程常用的几种时间戳转换(java .net 数据库)适合Java开发者学习的Python入门教程Java webdriver如何获取浏览器新窗口中的元素?Java重写与重载(区别与用途)Java变量的分类与初始化JavaScript有这几种测试分类Java有哪四个核心技术?给 Java开发者的10个大数据工具和框架Java中几个常用设计模式汇总java生态圈常用技术框架、开源中间件,系统架构及经典案例等
更新发布
功能测试和接口测试的区别
2023/3/23 14:23:39如何写好测试用例文档
2023/3/22 16:17:39常用的选择回归测试的方式有哪些?
2022/6/14 16:14:27测试流程中需要重点把关几个过程?
2021/10/18 15:37:44性能测试的七种方法
2021/9/17 15:19:29全链路压测优化思路
2021/9/14 15:42:25性能测试流程浅谈
2021/5/28 17:25:47常见的APP性能测试指标
2021/5/8 17:01:11热门文章
常见的移动App Bug??崩溃的测试用例设计如何用Jmeter做压力测试QC使用说明APP压力测试入门教程移动app测试中的主要问题jenkins+testng+ant+webdriver持续集成测试使用JMeter进行HTTP负载测试Selenium 2.0 WebDriver 使用指南