Android 汉字转拼音之JNI篇
package com.tool.hz2py; import android.os.Bundle; import android.app.Activity; import android.view.Menu; import android.widget.TextView; public class MainActivity extends Activity { protected Hz2py hz2py; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); hz2py = new Hz2py(); TextView view = (TextView) findViewById(R.id.text); view.setText(hz2py.hz2py("汉字转拼音")); } @Override public boolean onCreateOptionsMenu(Menu menu) { // Inflate the menu; this adds items to the action bar if it is present. getMenuInflater().inflate(R.menu.main, menu); return true; } }
jni类:Hz2py
package com.tool.hz2py; public class Hz2py { static { System.loadLibrary("Hz2py"); }; public native String hz2py(String text); }
下面是C++头文件和代码
/* DO NOT EDIT THIS FILE - it is machine generated */ #include <jni.h> /* Header for class com_tool_hz2py_Hz2py */ #ifndef _Included_com_tool_hz2py_Hz2py #define _Included_com_tool_hz2py_Hz2py #ifdef __cplusplus extern "C" { #endif /* * Class: com_tool_hz2py_Hz2py * Method: hz2py * Signature: (Ljava/lang/String;)Ljava/lang/String; */ JNIEXPORT jstring JNICALL Java_com_tool_hz2py_Hz2py_hz2py (JNIEnv *, jobject, jstring); #ifdef __cplusplus } #endif #endif
#include "hz2py.h" #include <string.h> #include "com_tool_hz2py_Hz2py.h" #define HZ2PY_UTF8_CHECK_LENGTH 20 #define HZ2PY_FILE_READ_BUF_ARRAY_SIZE 1000 #define HZ2PY_INPUT_BUF_ARRAY_SIZE 1024 #define HZ2PY_OUTPUT_BUF_ARRAY_SIZE 2048 #define HZ2PY_STR_COPY(to, from, count) ok = 1; i = 0; _tmp = from; while(i < count) { if (*_tmp == ‘\0‘) { ok = 0; break; } _tmp ++; i ++; } if (ok){ i = 0; while(i < count) { *to = *from; to ++; from ++; i ++; }}else{ if (overage_buff != NULL) { while(*from != ‘\0‘) { *overage_buff = *from; from ++; } } break;} //将utf8编码的字符串中的汉字解成拼音 // in 输入 // out 输出 // first_letter_only 是否只输出拼音首字母 // polyphone_support 是否输出多音字 // add_blank 是否在拼音之间追加空格 // convert_double_char 是否转换全角字符为半角字符 // overage_buff 末尾如果有多余的不能组成完整utf8字符的字节,将写到overage_buff,传NULL将输出到out void utf8_to_pinyin(char *in, char *out, int first_letter_only, int polyphone_support, int add_blank, int convert_double_char, char *overage_buff) { int i = 0; char *utf = in; char *_tmp; char *_tmp2; char py_tmp[30] = ""; char py_tmp2[30] = ""; char *out_start_flag = out; int uni; int ok = 0; while (*utf != ‘\0‘) { if ((*utf >> 7) == 0) { HZ2PY_STR_COPY(out, utf, 1); //如果为一个字节加上#号分隔 *out = ‘#‘; //用#号做为分隔符 out++; //去掉其它的英文只留汉字 //只能搜索到汉字拼音里面字母 // out--; // *out = ‘ ‘; } //两个字节 else if ((*utf & 0xE0) == 0xC0) { HZ2PY_STR_COPY(out, utf, 2); } //三个字节 else if ((*utf & 0xF0) == 0xE0) { if (*(utf + 1) != ‘\0‘ && *(utf + 2) != ‘\0‘) { uni = (((int) (*utf & 0x0F)) << 12) | (((int) (*(utf + 1) & 0x3F)) << 6) | (*(utf + 2) & 0x3F); if (uni > 19967 && uni < 40870) { memset(py_tmp, ‘\0‘, 30); memset(py_tmp2, ‘\0‘, 30); strcpy(py_tmp, _pinyin_table_[uni - 19968]); _tmp = py_tmp; _tmp2 = py_tmp2; if (first_letter_only == 1) { *_tmp2 = *_tmp; _tmp++; _tmp2++; while (*_tmp != ‘\0‘) { if (*_tmp == ‘|‘ || *(_tmp - 1) == ‘|‘) { *_tmp2 = *_tmp; _tmp2++; } _tmp++; } } else { strcpy(py_tmp2, py_tmp); } _tmp2 = py_tmp2; if (polyphone_support == 0) { while (*_tmp2 != ‘\0‘) { if (*_tmp2 == ‘|‘) { *_tmp2 = ‘\0‘; break; } _tmp2++; } _tmp2 = py_tmp2; } strcpy(out, _tmp2); out += strlen(_tmp2); if (add_blank) { *out = ‘#‘; //用#号做为分隔符 out++; } utf += 3; } else if (convert_double_char && uni > 65280 && uni < 65375) { *out = uni - 65248; out++; utf += 3; } else if (convert_double_char && uni == 12288) { *out = 32; out++; utf += 3; } else { HZ2PY_STR_COPY(out, utf, 3); } } else { HZ2PY_STR_COPY(out, utf, 3); } } //四个字节 else if ((*utf & 0xF8) == 0xF0) { HZ2PY_STR_COPY(out, utf, 4); } //五个字节 else if ((*utf & 0xFC) == 0xF8) { HZ2PY_STR_COPY(out, utf, 5); } //六个字节 else if ((*utf & 0xFE) == 0xFC) { HZ2PY_STR_COPY(out, utf, 6); } else { if (overage_buff != NULL) { *overage_buff = *utf; overage_buff++; } else { HZ2PY_STR_COPY(out, utf, 1); } break; } } } //判断一个字符串是否为utf8编码 int is_utf8_string(char *utf) { int length = strlen(utf); int check_sub = 0; int i = 0; if (length > HZ2PY_UTF8_CHECK_LENGTH) { length = HZ2PY_UTF8_CHECK_LENGTH; } for (; i < length; i++) { if (check_sub == 0) { if ((utf[i] >> 7) == 0) { continue; } else if ((utf[i] & 0xE0) == 0xC0) { check_sub = 1; } else if ((utf[i] & 0xF0) == 0xE0) { check_sub = 2; } else if ((utf[i] & 0xF8) == 0xF0) { check_sub = 3; } else if ((utf[i] & 0xFC) == 0xF8) { check_sub = 4; } else if ((utf[i] & 0xFE) == 0xFC) { check_sub = 5; } else { return 0; } } else { if ((utf[i] & 0xC0) != 0x80) { return 0; } check_sub--; } } return 1; } int hztpy(const char *read_buff, char *outbuf) { char overage_buff[7] = { 0 }; char *_tmp = NULL; char inbuf[HZ2PY_INPUT_BUF_ARRAY_SIZE] = { 0 }; int add_blank = 1; int polyphone_support = 1; int first_letter_only = 0; int convert_double_char = 0; // first_letter_only 是否只输出拼音首字母 // polyphone_support 是否输出多音字 // add_blank 是否在拼音之间追加空格 // convert_double_char 是否转换全角字符为半角字符 // overage_buff 末尾如果有多余的不能组成完整utf8字符的字节,将写到overage_buff,传NULL将输出到out _tmp = inbuf; if (strlen(overage_buff)) { strcpy(_tmp, overage_buff); _tmp += strlen(overage_buff); memset(overage_buff, ‘\0‘, 7); } strcpy(_tmp, read_buff); if (!is_utf8_string(inbuf)) { return -1; } utf8_to_pinyin(inbuf, outbuf, first_letter_only, polyphone_support, add_blank, convert_double_char, overage_buff); return 1; } JNIEXPORT jstring JNICALL Java_com_tool_hz2py_Hz2py_hz2py(JNIEnv *env, jobject thiz, jstring text) { const char* pText = env->GetStringUTFChars(text, 0); char* oText = new char[512];//256中文 memset(oText,0,512); hztpy(pText,oText); jstring returnText = env->NewStringUTF(oText); env->ReleaseStringUTFChars(text,pText); delete oText; return returnText; }
头文件有点大,我直接上传给大家下载好了。
hz2py
直接编译好的.so共享库和java文件,注意,包名只能用这个,不能更改。
libHz2py
- 本文固定链接: http://www.ithtw.com/37.html
- 转载请注明: leehom 2014年08月05日 于 IT十万为什么 发表
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。