用httpclient抓取火车票信息
作者:网络转载 发布时间:[ 2017/1/4 11:01:30 ] 推荐标签:测试开发技术 Java 数据库
一个通过httpclient抓取火车票信息的程序,需要修改下才能跑通,需要自己封装下httpclient,然后用get方式调用,还有fastJson,需要自己去解析下获得的数据,catchTrainInfo()是入口方法
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import javax.annotation.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Set;
public class CatchTrainInfo {
/**
* 抓取火车票信息
* @param attachmentId
* @return
*/
@SuppressWarnings("unchecked")
public String catchTrainInfo() {
//车站信息
HashSet<String> station = new HashSet<String>();
//临时车站信息
HashSet<String> stationTmp = new HashSet<String>();
//车次信息
Map<String,String> train = new HashMap<String,String>();
String stationHtml = HttpClientUtil.get("http://www.59178.com/zhan/");
String stationName = "";
//截取车站信息
String stations[] = stationHtml.split("htm'>");
for (int i = 0; i < stations.length; i++) {
if (i == 0 ) {
continue;
}
stationName = getStation(stations[i]);
if("".equals(stationName)) {
continue;
}
station.add(stationName);
}
//循环,根据车站信息获取车次信息
getTrainsByStation(station,train);
//循环,根据车次获取车次详情,并保存入库
getTrainDetailsByTrains(stationTmp,train);
//继续执行3次循环,(本来应该stationTmp.size()==0的时候,防止无限循环)
int loop = 3;
while (loop > 0) {
//stationTmp 和 station比较,stationTmp去掉已经遍历过的站点,station用stationTmp替代,继续循环
HashSet<String> stationTmp1 = (HashSet<String>) stationTmp.clone();
for (String stationTmpElement : stationTmp) {
for (String stationElement : station) {
if(stationTmpElement.equals(stationElement)) {
stationTmp.remove(stationTmpElement);
}
}
}
station = stationTmp1;
//循环,根据车站信息获取车次信息
getTrainsByStation(stationTmp,train);
//循环,根据车次获取车次详情,并写入数据库
getTrainDetailsByTrains(stationTmp,train);
loop--;
}
return "success!";
}
/**
* 根据车次获取车次详情,并写入数据库
* @param stationTmp
* @param train
*/
private void getTrainDetailsByTrains(HashSet<String> stationTmp, Map<String, String> train) {
Iterator<Entry<String, String>> iterator = train.entrySet().iterator();
while (iterator.hasNext()) {
Entry<String, String> entry = iterator.next();
String key = entry.getKey();
String value = entry.getValue();
if("unused".equals(value)) {
getAndSaveTrainsDetails(key,stationTmp);
entry.setValue("used");
}
try {
//休眠一会,防止反爬虫
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
/**
* 根据车站信息获取车次信息
* @param station
* @param train
*/
private void getTrainsByStation(HashSet<String> station, Map<String, String> train) {
Iterator<String> iterator = station.iterator();
while (iterator.hasNext()) {
String key = iterator.next();
getTrainsInfo(train,key);
try {
//休眠一会,防止反爬虫
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
/**
* 根据车站信息得到车次信息
* @param train
* @param stationName
* @return
*/
private void getTrainsInfo(Map<String, String> train, String stationName) {
try {
stationName = URLEncoder.encode(stationName,"utf-8");
} catch (UnsupportedEncodingException e1) {
log.error("转码出错了!stationName:" + stationName);
}
String url = "http://train.qunar.com/qunar/stationInfo.jsp?q=" + stationName + "&format=json";
//根据车站信息获取车次 ticketInfo
String ticketInfo = HttpClientUtil.get(url);
try{
@SuppressWarnings("unchecked")
java.util.Map<String, Object> ticketInfos = (Map<String, Object>) JSONObject.parseObject(ticketInfo, java.util.Map.class).get("ticketInfo");
Set<Entry<String, Object>> entries = ticketInfos.entrySet( );
if (entries != null) {
Iterator<Entry<String, Object>> iterator = entries.iterator( );
while (iterator.hasNext( )) {
Entry<String, Object> entry = iterator.next();
String key = (String) entry.getKey( );
String trains[] = key.split("/");
for(int i = 0; i< trains.length; i++) {
train.put(trains[i], "unused");
}
}
}
} catch (Exception e) {
log.info("根据车站信息得到车次信息报错:"+e.getMessage());
}
}
/**
* 解析得到车站信息
* @param str
* @return
*/
private String getStation(String str) {
if (str == null || str.length() <= 0) {
return "";
}
int pos = -1;
pos = str.indexOf("</a>", pos + 1);
if (pos == -1) {
return "";
}
return str.substring(0, pos);
}
/**
* 根据车次获取车次详情,并保存入库
*
* @param key
* @param stationTmp
*/
public void getAndSaveTrainsDetails(String key, HashSet<String> stationTmp) {
String url = "http://train.qunar.com/qunar/checiInfo.jsp?q=" + key + "&date=20170107&format=json";
String trainScheduleBody = HttpClientUtil.getUtf8(url);
try {
List<Object> ticketInfos = (List<Object>) JSONObject.parseObject(trainScheduleBody, Map.class)
.get("trainScheduleBody");
for (int i = 0; i < ticketInfos.size(); i++) {
List<String> list = (List<String>) JSONObject.parseObject(ticketInfos.get(i).toString(), Map.class)
.get("content");
if (list == null || list.size() <= 0) {
continue;
}
stationTmp.add(list.get(1));
// 得到详情,更新入库
//TODO
}
} catch (Exception e) {
log.info("根据车次获取车次详情报错:" + e.getMessage());
}
}
}
相关推荐
更新发布
功能测试和接口测试的区别
2023/3/23 14:23:39如何写好测试用例文档
2023/3/22 16:17:39常用的选择回归测试的方式有哪些?
2022/6/14 16:14:27测试流程中需要重点把关几个过程?
2021/10/18 15:37:44性能测试的七种方法
2021/9/17 15:19:29全链路压测优化思路
2021/9/14 15:42:25性能测试流程浅谈
2021/5/28 17:25:47常见的APP性能测试指标
2021/5/8 17:01:11