网页主动探测工具使用
网页主动探测工具使用
import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.InetAddress; import java.net.Socket; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Probe { private static final BlockingQueue<Task> CONNECTLIST = new LinkedBlockingQueue<Task>(); private static final BlockingQueue<Task> PARSELIST = new LinkedBlockingQueue<Task>(); private static final BlockingQueue<Task> PERSISTENCELIST = new LinkedBlockingQueue<Task>(); private static ExecutorService CONNECTTHREADPOOL; private static ExecutorService PARSETHREADPOOL; private static ExecutorService PERSISTENCETHREADPOOL; private static final List<String> DOMAINLIST = new CopyOnWriteArrayList<>(); static { CONNECTTHREADPOOL = Executors.newFixedThreadPool(200); PARSETHREADPOOL = Executors.newSingleThreadExecutor(); PERSISTENCETHREADPOOL = Executors.newFixedThreadPool(1); DOMAINLIST.add("域名"); } public static void main(String args[]) throws Exception { long start = System.currentTimeMillis(); CONNECTLIST.put(new Task("域名", 80, "/static/index.html")); for (int i = 0; i < 600; i++) { CONNECTTHREADPOOL.submit(new ConnectHandler(CONNECTLIST, PARSELIST)); } PARSETHREADPOOL.submit(new ParseHandler(CONNECTLIST, PARSELIST, PERSISTENCELIST, DOMAINLIST)); PERSISTENCETHREADPOOL.submit(new PersistenceHandler(PERSISTENCELIST)); while (true) { Thread.sleep(1000); long end = System.currentTimeMillis(); float interval = ((end - start) / 1000); int connectTotal = ConnectHandler.GETCOUNT(); int parseTotal = ParseHandler.GETCOUNT(); int persistenceTotal = PersistenceHandler.GETCOUNT(); int connectps = Math.round(connectTotal / interval); int parseps = Math.round(parseTotal / interval); int persistenceps = Math.round(persistenceTotal / interval); System.out.print("\r连接总数:" + connectTotal + " \t每秒连接:" + connectps + "\t连接队列剩余:" + CONNECTLIST.size() + " \t解析总数:" + parseTotal + " \t每秒解析:" + parseps + "\t解析队列剩余:" + PARSELIST.size() + " \t持久化总数:" + persistenceTotal + " \t每秒持久化:" + persistenceps + "\t持久化队列剩余:" + PERSISTENCELIST.size()); } } } class Task { public Task() { } public void init(String host, int port, String path) { this.setCurrentPath(path); this.host = host; this.port = port; } public Task(String host, int port, String path) { init(host, port, path); } private String host; private int port; private String currentPath; private long taskTime; private String type; private String content; private int state; public int getState() { return state; } public void setState(int state) { this.state = state; } public String getCurrentPath() { return currentPath; } public void setCurrentPath(String currentPath) { this.currentPath = currentPath; this.type = currentPath.substring(currentPath.indexOf(".") + 1, currentPath.indexOf("?") != -1 ? currentPath.indexOf("?") : currentPath.length()); } public long getTaskTime() { return taskTime; } public void setTaskTime(long taskTime) { this.taskTime = taskTime; } public String getType() { return type; } public void setType(String type) { this.type = type; } public String getHost() { return host; } public int getPort() { return port; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } } class ParseHandler implements Runnable { private static Set<String> SET = new ConcurrentSkipListSet<String>(); public static int GETCOUNT() { return COUNT.get(); } private static final AtomicInteger COUNT = new AtomicInteger(); private BlockingQueue<Task> connectlist; private BlockingQueue<Task> parselist; private BlockingQueue<Task> persistencelist; List<String> domainlist; |
private interface Filter { void doFilter(Task fatherTask, Task newTask, String path, Filter chain); } private class FilterChain implements Filter { private List<Filter> list = new ArrayList<Filter>(); { addFilter(new TwoLevel()); addFilter(new OneLevel()); addFilter(new FullPath()); addFilter(new Root()); addFilter(new Default()); } private void addFilter(Filter filter) { list.add(filter); } private Iterator<Filter> it = list.iterator(); @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (it.hasNext()) { it.next().doFilter(fatherTask, newTask, path, chain); } } } private class TwoLevel implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (path.startsWith("../../")) { String prefix = getPrefix(fatherTask.getCurrentPath(), 3); newTask.init(fatherTask.getHost(), fatherTask.getPort(), path.replace("../../", prefix)); } else { chain.doFilter(fatherTask, newTask, path, chain); } } } private class OneLevel implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (path.startsWith("../")) { String prefix = getPrefix(fatherTask.getCurrentPath(), 2); newTask.init(fatherTask.getHost(), fatherTask.getPort(), path.replace("../", prefix)); } else { chain.doFilter(fatherTask, newTask, path, chain); } } } private class FullPath implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (path.startsWith("http://")) { Iterator<String> it = domainlist.iterator(); boolean flag = false; while (it.hasNext()) { String domain = it.next(); if (path.startsWith("http://" + domain + "/")) { newTask.init(domain, fatherTask.getPort(), path.replace("http://" + domain + "/", "/")); flag = true; break; } } if (!flag) { newTask = null; } } else { chain.doFilter(fatherTask, newTask, path, chain); } } } private class Root implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (path.startsWith("/")) { newTask.init(fatherTask.getHost(), fatherTask.getPort(), path); } else { chain.doFilter(fatherTask, newTask, path, chain); } } } private class Default implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { String prefix = getPrefix(fatherTask.getCurrentPath(), 1); newTask.init(fatherTask.getHost(), fatherTask.getPort(), prefix + "/" + path); } } public ParseHandler(BlockingQueue<Task> connectlist, BlockingQueue<Task> parselist, BlockingQueue<Task> persistencelist, List<String> domainlist) { this.connectlist = connectlist; this.parselist = parselist; this.persistencelist = persistencelist; this.domainlist = domainlist; } private Pattern pattern = Pattern.compile("\"[^\"]+\\.htm[^\"]*\""); private void handler() { try { Task task = parselist.take(); parseTaskState(task); if (200 == task.getState()) { Matcher matcher = pattern.matcher(task.getContent()); while (matcher.find()) { String path = matcher.group(); if (!path.contains(" ") && !path.contains("\t") && !path.contains("(") && !path.contains(")") && !path.contains(":")) { path = path.substring(1, path.length() - 1); if (!SET.contains(path)) { SET.add(path); createNewTask(task, path); } } } } task.setContent(null); persistencelist.put(task); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void parseTaskState(Task task) { if (task.getContent().startsWith("HTTP/1.1")) { task.setState(Integer.parseInt(task.getContent().substring(9, 12))); } else { task.setState(Integer.parseInt(task.getContent().substring(19, 22))); } } /** * @param fatherTask * @param path * @throws Exception */ private void createNewTask(Task fatherTask, String path) throws Exception { Task newTask = new Task(); FilterChain filterchain = new FilterChain(); filterchain.doFilter(fatherTask, newTask, path, filterchain); if (newTask != null) { connectlist.put(newTask); } } private String getPrefix(String s, int count) { String prefix = s; while (count > 0) { prefix = prefix.substring(0, prefix.lastIndexOf("/")); count--; } return "".equals(prefix) ? "/" : prefix; } @Override public void run() { while (true) { this.handler(); COUNT.addAndGet(1); } } } class ConnectHandler implements Runnable { public static int GETCOUNT() { return COUNT.get(); } private static final AtomicInteger COUNT = new AtomicInteger(); private BlockingQueue<Task> connectlist; private BlockingQueue<Task> parselist; public ConnectHandler(BlockingQueue<Task> connectlist, BlockingQueue<Task> parselist) { this.connectlist = connectlist; this.parselist = parselist; } private void handler() { try { Task task = connectlist.take(); long start = System.currentTimeMillis(); getHtml(task); long end = System.currentTimeMillis(); task.setTaskTime(end - start); parselist.put(task); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void getHtml(Task task) throws Exception { StringBuilder sb = new StringBuilder(2048); InetAddress addr = InetAddress.getByName(task.getHost()); // 建立一个Socket Socket socket = new Socket(addr, task.getPort()); // 发送命令,无非就是在Socket发送流的基础上加多一些握手信息,详情请了解HTTP协议 BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream(), "UTF-8")); wr.write("GET " + task.getCurrentPath() + " HTTP/1.0\r\n"); wr.write("HOST:" + task.getHost() + "\r\n"); wr.write("Accept:*/*\r\n"); wr.write("\r\n"); wr.flush(); // 接收Socket返回的结果,并打印出来 BufferedReader rd = new BufferedReader(new InputStreamReader(socket.getInputStream())); String line; while ((line = rd.readLine()) != null) { sb.append(line); } wr.close(); rd.close(); task.setContent(sb.toString()); socket.close(); } @Override public void run() { while (true) { this.handler(); COUNT.addAndGet(1); } } } class PersistenceHandler implements Runnable { static { try { Class.forName("oracle.jdbc.OracleDriver"); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static int GETCOUNT() { return COUNT.get(); } private static final AtomicInteger COUNT = new AtomicInteger(); private BlockingQueue<Task> persistencelist; public PersistenceHandler(BlockingQueue<Task> persistencelist) { this.persistencelist = persistencelist; try { conn = DriverManager.getConnection("jdbc:oracle:thin:127.0.0.1:1521:orcl", "edmond", "edmond"); ps = conn .prepareStatement("insert into probe(id,host,path,state,tasktime,type) values(seq_probe_id.nextval,?,?,?,?,?)"); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private Connection conn; private PreparedStatement ps; @Override public void run() { while (true) { this.handler(); COUNT.addAndGet(1); } } private void handler() { try { Task task = persistencelist.take(); ps.setString(1, task.getHost()); ps.setString(2, task.getCurrentPath()); ps.setInt(3, task.getState()); ps.setLong(4, task.getTaskTime()); ps.setString(5, task.getType()); ps.executeUpdate(); conn.commit(); } catch (InterruptedException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } } } |
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。