Java NIO 写大文件比较
写2G文件,分批次写入,每批次写入128MB;
分别在Win7系统(3G内存,双核,32位,T系列处理器)和MacOS系统(8G内存,四核,64位,i7系列处理器)下运行测试。理论上跟硬盘类型和配置也有关系,这里不再贴出了。
测试代码
package rwbigfile;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.channels.ReadableByteChannel;
import java.security.AccessController;
import java.security.PrivilegedAction;
import util.StopWatch;
/**
* NIO写大文件比较
* @author Will
*
*/
public class WriteBigFileComparison {
// data chunk be written per time
private static final int DATA_CHUNK = 128 * 1024 * 1024;
// total data size is 2G
private static final long LEN = 2L * 1024 * 1024 * 1024L;
public static void writeWithFileChannel() throws IOException {
File file = new File("e:/test/fc.dat");
if (file.exists()) {
file.delete();
}
RandomAccessFile raf = new RandomAccessFile(file, "rw");
FileChannel fileChannel = raf.getChannel();
byte[] data = null;
long len = LEN;
ByteBuffer buf = ByteBuffer.allocate(DATA_CHUNK);
int dataChunk = DATA_CHUNK / (1024 * 1024);
while (len >= DATA_CHUNK) {
System.out.println("write a data chunk: " + dataChunk + "MB");
buf.clear(); // clear for re-write
data = new byte[DATA_CHUNK];
for (int i = 0; i < DATA_CHUNK; i++) {
buf.put(data[i]);
}
data = null;
buf.flip(); // switches a Buffer from writing mode to reading mode
fileChannel.write(buf);
fileChannel.force(true);
len -= DATA_CHUNK;
}
if (len > 0) {
System.out.println("write rest data chunk: " + len + "B");
buf = ByteBuffer.allocateDirect((int) len);
data = new byte[(int) len];
for (int i = 0; i < len; i++) {
buf.put(data[i]);
}
buf.flip(); // switches a Buffer from writing mode to reading mode, position to 0, limit not changed
fileChannel.write(buf);
fileChannel.force(true);
data = null;
}
fileChannel.close();
raf.close();
}
/**
* write big file with MappedByteBuffer
* @throws IOException
*/
public static void writeWithMappedByteBuffer() throws IOException {
File file = new File("e:/test/mb.dat");
if (file.exists()) {
file.delete();
}
RandomAccessFile raf = new RandomAccessFile(file, "rw");
FileChannel fileChannel = raf.getChannel();
int pos = 0;
MappedByteBuffer mbb = null;
byte[] data = null;
long len = LEN;
int dataChunk = DATA_CHUNK / (1024 * 1024);
while (len >= DATA_CHUNK) {
System.out.println("write a data chunk: " + dataChunk + "MB");
mbb = fileChannel.map(MapMode.READ_WRITE, pos, DATA_CHUNK);
data = new byte[DATA_CHUNK];
mbb.put(data);
data = null;
len -= DATA_CHUNK;
pos += DATA_CHUNK;
}
if (len > 0) {
System.out.println("write rest data chunk: " + len + "B");
mbb = fileChannel.map(MapMode.READ_WRITE, pos, len);
data = new byte[(int) len];
mbb.put(data);
}
data = null;
unmap(mbb); // release MappedByteBuffer
fileChannel.close();
}
public static void writeWithTransferTo() throws IOException {
File file = new File("e:/test/transfer.dat");
if (file.exists()) {
file.delete();
}
RandomAccessFile raf = new RandomAccessFile(file, "rw");
FileChannel toFileChannel = raf.getChannel();
long len = LEN;
byte[] data = null;
ByteArrayInputStream bais = null;
ReadableByteChannel fromByteChannel = null;
long position = 0;
int dataChunk = DATA_CHUNK / (1024 * 1024);
while (len >= DATA_CHUNK) {
System.out.println("write a data chunk: " + dataChunk + "MB");
data = new byte[DATA_CHUNK];
bais = new ByteArrayInputStream(data);
fromByteChannel = Channels.newChannel(bais);
long count = DATA_CHUNK;
toFileChannel.transferFrom(fromByteChannel, position, count);
data = null;
position += DATA_CHUNK;
len -= DATA_CHUNK;
}
if (len > 0) {
System.out.println("write rest data chunk: " + len + "B");
data = new byte[(int) len];
bais = new ByteArrayInputStream(data);
fromByteChannel = Channels.newChannel(bais);
long count = len;
toFileChannel.transferFrom(fromByteChannel, position, count);
}
data = null;
toFileChannel.close();
fromByteChannel.close();
}
/**
* 在MappedByteBuffer释放后再对它进行读操作的话就会引发jvm crash,在并发情况下很容易发生
* 正在释放时另一个线程正开始读取,于是crash就发生了。所以为了系统稳定性释放前一般需要检
* 查是否还有线程在读或写
* @param mappedByteBuffer
*/
public static void unmap(final MappedByteBuffer mappedByteBuffer) {
try {
if (mappedByteBuffer == null) {
return;
}
mappedByteBuffer.force();
AccessController.doPrivileged(new PrivilegedAction<Object>() {
@Override
@SuppressWarnings("restriction")
public Object run() {
try {
Method getCleanerMethod = mappedByteBuffer.getClass()
.getMethod("cleaner", new Class[0]);
getCleanerMethod.setAccessible(true);
sun.misc.Cleaner cleaner =
(sun.misc.Cleaner) getCleanerMethod
.invoke(mappedByteBuffer, new Object[0]);
cleaner.clean();
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("clean MappedByteBuffer completed");
return null;
}
});
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws IOException {
StopWatch sw = new StopWatch();
sw.startWithTaskName("write with file channel's write(ByteBuffer)");
writeWithFileChannel();
sw.stopAndPrint();
sw.startWithTaskName("write with file channel's transferTo");
writeWithTransferTo();
sw.stopAndPrint();
sw.startWithTaskName("write with MappedByteBuffer");
writeWithMappedByteBuffer();
sw.stopAndPrint();
}
}
测试结果(Y轴是耗时秒数)
显然writeWithMappedByteBuffer方式性能最好,且在硬件配置较高情况下优势越加明显
在硬件配置较低情况下,writeWithTransferTo比writeWithFileChannel性能稍好
在硬件配置较高情况下,writeWithTransferTo和writeWithFileChannel的性能基本持平
此外,注意writeWithMappedByteBuffer方式除了占用JVM堆内存外,还要占用额外的native内存(Direct Byte Buffer内存)
内存映射文件使用经验
MappedByteBuffer需要占用“双倍”的内存(对象JVM堆内存和Direct Byte Buffer内存),可以通过-XX:MaxDirectMemorySize参数设置后者最大大小
不要频繁调用MappedByteBuffer的force()方法,因为这个方法会强制OS刷新内存中的数据到磁盘,从而只能获得些微的性能提升(相比IO方式),可以用后面的代码实例进行定时、定量刷新
如果突然断电或者服务器突然Down,内存映射文件数据可能还没有写入磁盘,这时就会丢失一些数据。为了降低这种风险,避免用MappedByteBuffer写超大文件,可以把大文件分割成几个小文件,但不能太小(否则将失去性能优势)
ByteBuffer的rewind()方法将position属性设回为0,因此可以重新读取buffer中的数据;limit属性保持不变,因此可读取的字节数不变
ByteBuffer的flip()方法将一个Buffer由写模式切换到读模式
ByteBuffer的clear()和compact()可以在我们读完ByteBuffer中的数据后重新切回写模式。不同的是clear()会将position设置为0,limit设为capacity,换句话说Buffer被清空了,但Buffer内的数据并没有被清空。如果Buffer中还有未被读取的数据,那调用clear()之后,这些数据会被“遗忘”,再写入就会覆盖这些未读数据。而调用compcat()之后,这些未被读取的数据仍然可以保留,因为它将所有还未被读取的数据拷贝到Buffer的左端,然后设置position为紧随未读数据之后,limit被设置为capacity,未读数据不会被覆盖
定时、定量刷新内存映射文件到磁盘
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
public class MappedFile {
// 文件名
private String fileName;
// 文件所在目录路径
private String fileDirPath;
// 文件对象
private File file;
private MappedByteBuffer mappedByteBuffer;
private FileChannel fileChannel;
private boolean boundSuccess = false;
// 文件最大只能为50MB
private final static long MAX_FILE_SIZE = 1024 * 1024 * 50;
// 最大的脏数据量512KB,系统必须触发一次强制刷
private long MAX_FLUSH_DATA_SIZE = 1024 * 512;
// 最大的刷间隔,系统必须触发一次强制刷
private long MAX_FLUSH_TIME_GAP = 1000;
// 文件写入位置
private long writePosition = 0;
// 最后一次刷数据的时候
private long lastFlushTime;
// 上一次刷的文件位置
private long lastFlushFilePosition = 0;
public MappedFile(String fileName, String fileDirPath) {
super();
this.fileName = fileName;
this.fileDirPath = fileDirPath;
this.file = new File(fileDirPath + "/" + fileName);
if (!file.exists()) {
try {
file.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
*
* 内存映照文件绑定
* @return
*/
public synchronized boolean boundChannelToByteBuffer() {
try {
RandomAccessFile raf = new RandomAccessFile(file, "rw");
this.fileChannel = raf.getChannel();
} catch (Exception e) {
e.printStackTrace();
this.boundSuccess = false;
return false;
}
try {
this.mappedByteBuffer = this.fileChannel
.map(FileChannel.MapMode.READ_WRITE, 0, MAX_FILE_SIZE);
} catch (IOException e) {
e.printStackTrace();
this.boundSuccess = false;
return false;
}
this.boundSuccess = true;
return true;
}
/**
* 写数据:先将之前的文件删除然后重新
* @param data
* @return
*/
public synchronized boolean writeData(byte[] data) {
return false;
}
/**
* 在文件末尾追加数据
* @param data
* @return
* @throws Exception
*/
public synchronized boolean appendData(byte[] data) throws Exception {
if (!boundSuccess) {
boundChannelToByteBuffer();
}
writePosition = writePosition + data.length;
if (writePosition >= MAX_FILE_SIZE) { // 如果写入data会超出文件大小限制,不写入
flush();
writePosition = writePosition - data.length;
System.out.println("File="
+ file.toURI().toString()
+ " is written full.");
System.out.println("already write data length:"
+ writePosition
+ ", max file size=" + MAX_FILE_SIZE);
return false;
}
this.mappedByteBuffer.put(data);
// 检查是否需要把内存缓冲刷到磁盘
if ( (writePosition - lastFlushFilePosition > this.MAX_FLUSH_DATA_SIZE)
||
(System.currentTimeMillis() - lastFlushTime > this.MAX_FLUSH_TIME_GAP
&& writePosition > lastFlushFilePosition) ) {
flush(); // 刷到磁盘
}
return true;
}
public synchronized void flush() {
this.mappedByteBuffer.force();
this.lastFlushTime = System.currentTimeMillis();
this.lastFlushFilePosition = writePosition;
}
public long getLastFlushTime() {
return lastFlushTime;
}
public String getFileName() {
return fileName;
}
public String getFileDirPath() {
return fileDirPath;
}
public boolean isBundSuccess() {
return boundSuccess;
}
public File getFile() {
return file;
}
public static long getMaxFileSize() {
return MAX_FILE_SIZE;
}
public long getWritePosition() {
return writePosition;
}
public long getLastFlushFilePosition() {
return lastFlushFilePosition;
}
public long getMAX_FLUSH_DATA_SIZE() {
return MAX_FLUSH_DATA_SIZE;
}
public long getMAX_FLUSH_TIME_GAP() {
return MAX_FLUSH_TIME_GAP;
}
}
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。