Android 汉字转拼音之JNI篇

技术分享

 

package com.tool.hz2py;

import android.os.Bundle;
import android.app.Activity;
import android.view.Menu;
import android.widget.TextView;

public class MainActivity extends Activity {

	protected Hz2py hz2py;

	@Override
	protected void onCreate(Bundle savedInstanceState) {
		super.onCreate(savedInstanceState);
		setContentView(R.layout.activity_main);
		hz2py = new Hz2py();
		TextView view = (TextView) findViewById(R.id.text);
		view.setText(hz2py.hz2py("汉字转拼音"));
	}

	@Override
	public boolean onCreateOptionsMenu(Menu menu) {
		// Inflate the menu; this adds items to the action bar if it is present.
		getMenuInflater().inflate(R.menu.main, menu);
		return true;
	}

}

  

 

jni类:Hz2py

 

package com.tool.hz2py;

public class Hz2py {
	
	static {
		System.loadLibrary("Hz2py");
	};

	public native String hz2py(String text);

}

  

 

下面是C++头文件和代码

 

/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class com_tool_hz2py_Hz2py */

#ifndef _Included_com_tool_hz2py_Hz2py
#define _Included_com_tool_hz2py_Hz2py
#ifdef __cplusplus
extern "C" {
#endif
/*
 * Class:     com_tool_hz2py_Hz2py
 * Method:    hz2py
 * Signature: (Ljava/lang/String;)Ljava/lang/String;
 */
JNIEXPORT jstring JNICALL Java_com_tool_hz2py_Hz2py_hz2py
  (JNIEnv *, jobject, jstring);

#ifdef __cplusplus
}
#endif
#endif

  

 

#include "hz2py.h"
#include <string.h>
#include "com_tool_hz2py_Hz2py.h"

#define HZ2PY_UTF8_CHECK_LENGTH 20
#define HZ2PY_FILE_READ_BUF_ARRAY_SIZE 1000
#define HZ2PY_INPUT_BUF_ARRAY_SIZE 1024
#define HZ2PY_OUTPUT_BUF_ARRAY_SIZE 2048
#define HZ2PY_STR_COPY(to, from, count)     ok = 1;    i = 0;    _tmp = from;    while(i < count)    {        if (*_tmp == ‘\0‘)        {            ok = 0;            break;        }        _tmp ++;        i ++;    }    if (ok){    i = 0;    while(i < count)    {        *to = *from;        to ++;        from ++;        i ++;    }}else{    if (overage_buff != NULL)    {        while(*from != ‘\0‘)        {            *overage_buff = *from;            from ++;        }    }    break;}

//将utf8编码的字符串中的汉字解成拼音
// in 输入
// out 输出
// first_letter_only 是否只输出拼音首字母
// polyphone_support 是否输出多音字
// add_blank 是否在拼音之间追加空格
// convert_double_char 是否转换全角字符为半角字符
// overage_buff 末尾如果有多余的不能组成完整utf8字符的字节,将写到overage_buff,传NULL将输出到out

void utf8_to_pinyin(char *in, char *out, int first_letter_only,
		int polyphone_support, int add_blank, int convert_double_char,
		char *overage_buff) {
	int i = 0;
	char *utf = in;
	char *_tmp;
	char *_tmp2;
	char py_tmp[30] = "";
	char py_tmp2[30] = "";
	char *out_start_flag = out;
	int uni;
	int ok = 0;
	while (*utf != ‘\0‘) {
		if ((*utf >> 7) == 0) {
			HZ2PY_STR_COPY(out, utf, 1);
			//如果为一个字节加上#号分隔
			*out = ‘#‘; //用#号做为分隔符
			out++;
			//去掉其它的英文只留汉字
			//只能搜索到汉字拼音里面字母
			//			out--;
			//			*out = ‘ ‘;
		}
		//两个字节
		else if ((*utf & 0xE0) == 0xC0) {
			HZ2PY_STR_COPY(out, utf, 2);
		}
		//三个字节
		else if ((*utf & 0xF0) == 0xE0) {
			if (*(utf + 1) != ‘\0‘ && *(utf + 2) != ‘\0‘) {
				uni = (((int) (*utf & 0x0F)) << 12)
						| (((int) (*(utf + 1) & 0x3F)) << 6)
						| (*(utf + 2) & 0x3F);

				if (uni > 19967 && uni < 40870) {
					memset(py_tmp, ‘\0‘, 30);
					memset(py_tmp2, ‘\0‘, 30);
					strcpy(py_tmp, _pinyin_table_[uni - 19968]);
					_tmp = py_tmp;
					_tmp2 = py_tmp2;

					if (first_letter_only == 1) {
						*_tmp2 = *_tmp;
						_tmp++;
						_tmp2++;
						while (*_tmp != ‘\0‘) {
							if (*_tmp == ‘|‘ || *(_tmp - 1) == ‘|‘) {
								*_tmp2 = *_tmp;
								_tmp2++;
							}
							_tmp++;
						}
					} else {
						strcpy(py_tmp2, py_tmp);
					}

					_tmp2 = py_tmp2;

					if (polyphone_support == 0) {
						while (*_tmp2 != ‘\0‘) {
							if (*_tmp2 == ‘|‘) {
								*_tmp2 = ‘\0‘;
								break;
							}
							_tmp2++;
						}
						_tmp2 = py_tmp2;
					}
					strcpy(out, _tmp2);
					out += strlen(_tmp2);
					if (add_blank) {
						*out = ‘#‘; //用#号做为分隔符
						out++;
					}
					utf += 3;
				} else if (convert_double_char && uni > 65280 && uni < 65375) {
					*out = uni - 65248;
					out++;
					utf += 3;
				} else if (convert_double_char && uni == 12288) {
					*out = 32;
					out++;
					utf += 3;
				} else {
					HZ2PY_STR_COPY(out, utf, 3);
				}
			} else {
				HZ2PY_STR_COPY(out, utf, 3);
			}
		}
		//四个字节
		else if ((*utf & 0xF8) == 0xF0) {
			HZ2PY_STR_COPY(out, utf, 4);
		}
		//五个字节
		else if ((*utf & 0xFC) == 0xF8) {
			HZ2PY_STR_COPY(out, utf, 5);
		}
		//六个字节
		else if ((*utf & 0xFE) == 0xFC) {
			HZ2PY_STR_COPY(out, utf, 6);
		} else {
			if (overage_buff != NULL) {
				*overage_buff = *utf;
				overage_buff++;
			} else {
				HZ2PY_STR_COPY(out, utf, 1);
			}
			break;
		}
	}
}

//判断一个字符串是否为utf8编码
int is_utf8_string(char *utf) {
	int length = strlen(utf);
	int check_sub = 0;
	int i = 0;

	if (length > HZ2PY_UTF8_CHECK_LENGTH) {
		length = HZ2PY_UTF8_CHECK_LENGTH;
	}

	for (; i < length; i++) {
		if (check_sub == 0) {
			if ((utf[i] >> 7) == 0) {
				continue;
			} else if ((utf[i] & 0xE0) == 0xC0) {
				check_sub = 1;
			} else if ((utf[i] & 0xF0) == 0xE0) {
				check_sub = 2;
			} else if ((utf[i] & 0xF8) == 0xF0) {
				check_sub = 3;
			} else if ((utf[i] & 0xFC) == 0xF8) {
				check_sub = 4;
			} else if ((utf[i] & 0xFE) == 0xFC) {
				check_sub = 5;
			} else {
				return 0;
			}
		} else {
			if ((utf[i] & 0xC0) != 0x80) {
				return 0;
			}
			check_sub--;
		}
	}
	return 1;
}

int hztpy(const char *read_buff, char *outbuf) {
	char overage_buff[7] = { 0 };
	char *_tmp = NULL;
	char inbuf[HZ2PY_INPUT_BUF_ARRAY_SIZE] = { 0 };
	int add_blank = 1;
	int polyphone_support = 1;
	int first_letter_only = 0;
	int convert_double_char = 0;

	// first_letter_only 是否只输出拼音首字母
	// polyphone_support 是否输出多音字
	// add_blank 是否在拼音之间追加空格
	// convert_double_char 是否转换全角字符为半角字符
	// overage_buff 末尾如果有多余的不能组成完整utf8字符的字节,将写到overage_buff,传NULL将输出到out

	_tmp = inbuf;
	if (strlen(overage_buff)) {
		strcpy(_tmp, overage_buff);
		_tmp += strlen(overage_buff);
		memset(overage_buff, ‘\0‘, 7);
	}
	strcpy(_tmp, read_buff);
	if (!is_utf8_string(inbuf)) {
		return -1;
	}
	utf8_to_pinyin(inbuf, outbuf, first_letter_only, polyphone_support,
			add_blank, convert_double_char, overage_buff);
	return 1;
}

JNIEXPORT jstring JNICALL Java_com_tool_hz2py_Hz2py_hz2py(JNIEnv *env,
		jobject thiz, jstring text) {
	const char* pText = env->GetStringUTFChars(text, 0);
	char* oText = new char[512];//256中文
	memset(oText,0,512);
	hztpy(pText,oText);
	jstring returnText = env->NewStringUTF(oText);
	env->ReleaseStringUTFChars(text,pText);
	delete oText;
	return returnText;
}

  

 

头文件有点大,我直接上传给大家下载好了。
hz2py
直接编译好的.so共享库和java文件,注意,包名只能用这个,不能更改。
libHz2py

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。