Java网页抓取
package com.changying.spider; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URL; public class Spider { public static void main(String[] args) throws IOException { System.out.println(args[0]); System.out.println(args[1]); File config = new File(args[0]); //System.out.println("Hello World!"); BufferedReader fileReader = new BufferedReader(new FileReader(config)); String strUrl = fileReader.readLine(); System.out.println(strUrl); URL url = new URL(strUrl); InputStream in = url.openStream(); File resultFile = new File(args[1] + "\\url001.html"); if (!resultFile.exists()) { resultFile.createNewFile(); } //OutputStream result = new FileOutputStream(args[1] + "\\url001.html"); OutputStream result = new FileOutputStream(resultFile); byte[] buffer = new byte[4096]; int bytes_read; while ((bytes_read = in.read(buffer)) != -1) { result.write(buffer, 0, bytes_read); } fileReader.close(); in.close(); result.close(); } }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。