一个通过httpclient抓取火车票信息的程序,需要修改下才能跑通,需要自己封装下httpclient,然后用get方式调用,还有fastJson,需要自己去解析下获得的数据,catchTrainInfo()是入口方法
  import java.io.UnsupportedEncodingException;
  import java.net.URLEncoder;
  import java.util.HashMap;
  import java.util.HashSet;
  import java.util.Iterator;
  import java.util.Map;
  import java.util.Map.Entry;
  import javax.annotation.Resource;
  import org.slf4j.Logger;
  import org.slf4j.LoggerFactory;
  import java.util.Set;
  public class CatchTrainInfo {
  /**
  * 抓取火车票信息
  * @param attachmentId
  * @return
  */
  @SuppressWarnings("unchecked")
  public  String catchTrainInfo() {
  //车站信息
  HashSet<String> station = new HashSet<String>();
  //临时车站信息
  HashSet<String> stationTmp = new HashSet<String>();
  //车次信息
  Map<String,String> train = new HashMap<String,String>();
  String stationHtml = HttpClientUtil.get("http://www.59178.com/zhan/");
  String stationName = "";
  //截取车站信息
  String stations[] = stationHtml.split("htm'>");
  for (int i = 0; i < stations.length; i++) {
  if (i == 0 ) {
  continue;
  }
  stationName = getStation(stations[i]);
  if("".equals(stationName)) {
  continue;
  }
  station.add(stationName);
  }
  //循环,根据车站信息获取车次信息
  getTrainsByStation(station,train);
  //循环,根据车次获取车次详情,并保存入库
  getTrainDetailsByTrains(stationTmp,train);
  //继续执行3次循环,(本来应该stationTmp.size()==0的时候,防止无限循环)
  int loop = 3;
  while (loop > 0) {
  //stationTmp 和 station比较,stationTmp去掉已经遍历过的站点,station用stationTmp替代,继续循环
  HashSet<String> stationTmp1 = (HashSet<String>) stationTmp.clone();
  for (String stationTmpElement : stationTmp) {
  for (String stationElement : station) {
  if(stationTmpElement.equals(stationElement)) {
  stationTmp.remove(stationTmpElement);
  }
  }
  }
  station = stationTmp1;
  //循环,根据车站信息获取车次信息
  getTrainsByStation(stationTmp,train);
  //循环,根据车次获取车次详情,并写入数据库
  getTrainDetailsByTrains(stationTmp,train);
  loop--;
  }
  return "success!";
  }
  /**
  * 根据车次获取车次详情,并写入数据库
  * @param stationTmp
  * @param train
  */
  private  void getTrainDetailsByTrains(HashSet<String> stationTmp, Map<String, String> train) {
  Iterator<Entry<String, String>> iterator = train.entrySet().iterator();
  while (iterator.hasNext()) {
  Entry<String, String> entry = iterator.next();
  String key =  entry.getKey();
  String value =  entry.getValue();
  if("unused".equals(value)) {
  getAndSaveTrainsDetails(key,stationTmp);
  entry.setValue("used");
  }
  try {
  //休眠一会,防止反爬虫
  Thread.sleep(100);
  } catch (InterruptedException e) {
  e.printStackTrace();
  }
  }
  }
  /**
  * 根据车站信息获取车次信息
  * @param station
  * @param train
  */
  private  void getTrainsByStation(HashSet<String> station, Map<String, String> train) {
  Iterator<String> iterator = station.iterator();
  while (iterator.hasNext()) {
  String key = iterator.next();
  getTrainsInfo(train,key);
  try {
  //休眠一会,防止反爬虫
  Thread.sleep(100);
  } catch (InterruptedException e) {
  e.printStackTrace();
  }
  }
  }
  /**
  * 根据车站信息得到车次信息
  * @param train
  * @param stationName
  * @return
  */
  private  void getTrainsInfo(Map<String, String> train, String stationName) {
  try {
  stationName = URLEncoder.encode(stationName,"utf-8");
  } catch (UnsupportedEncodingException e1) {
  log.error("转码出错了!stationName:" + stationName);
  }
  String url = "http://train.qunar.com/qunar/stationInfo.jsp?q=" + stationName + "&format=json";
  //根据车站信息获取车次  ticketInfo
  String ticketInfo = HttpClientUtil.get(url);
  try{
  @SuppressWarnings("unchecked")
  java.util.Map<String, Object> ticketInfos = (Map<String, Object>) JSONObject.parseObject(ticketInfo, java.util.Map.class).get("ticketInfo");
  Set<Entry<String, Object>> entries = ticketInfos.entrySet( );
  if (entries != null) {
  Iterator<Entry<String, Object>> iterator = entries.iterator( );
  while (iterator.hasNext( )) {
  Entry<String, Object> entry = iterator.next();
  String key = (String) entry.getKey( );
  String trains[] = key.split("/");
  for(int i = 0; i< trains.length; i++) {
  train.put(trains[i], "unused");
  }
  }
  }
  } catch (Exception e) {
  log.info("根据车站信息得到车次信息报错:"+e.getMessage());
  }
  }
  /**
  * 解析得到车站信息
  * @param str
  * @return
  */
  private  String getStation(String str) {
  if (str == null || str.length() <= 0) {
  return "";
  }
  int pos = -1;
  pos = str.indexOf("</a>", pos + 1);
  if (pos == -1) {
  return "";
  }
  return str.substring(0, pos);
  }
  /**
  * 根据车次获取车次详情,并保存入库
  *
  * @param key
  * @param stationTmp
  */
  public void getAndSaveTrainsDetails(String key, HashSet<String> stationTmp) {
  String url = "http://train.qunar.com/qunar/checiInfo.jsp?q=" + key + "&date=20170107&format=json";
  String trainScheduleBody = HttpClientUtil.getUtf8(url);
  try {
  List<Object> ticketInfos = (List<Object>) JSONObject.parseObject(trainScheduleBody, Map.class)
  .get("trainScheduleBody");
  for (int i = 0; i < ticketInfos.size(); i++) {
  List<String> list = (List<String>) JSONObject.parseObject(ticketInfos.get(i).toString(), Map.class)
  .get("content");
  if (list == null || list.size() <= 0) {
  continue;
  }
  stationTmp.add(list.get(1));
  // 得到详情,更新入库
  //TODO 
  }
  } catch (Exception e) {
  log.info("根据车次获取车次详情报错:" + e.getMessage());
  }
  }
  }