summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid A. Madore <david@procyon.(none)>2010-04-27 20:55:58 +0200
committerDavid A. Madore <david@procyon.(none)>2010-04-27 20:55:58 +0200
commit7108dafb73f4c06261a56b819f4530ffd17bf6e7 (patch)
tree22bf852a5e4dcc7c511ee6422fe253bd8dbc0cfc
parent9df4bb4fa30a0c05ccd484d6b59521bfca665854 (diff)
downloadUnicodeMap-7108dafb73f4c06261a56b819f4530ffd17bf6e7.tar.gz
UnicodeMap-7108dafb73f4c06261a56b819f4530ffd17bf6e7.tar.bz2
UnicodeMap-7108dafb73f4c06261a56b819f4530ffd17bf6e7.zip
Preliminary implementation of CJK and Hangul.
-rw-r--r--src/org/madore/android/unicodeMap/UnicodeCharacter.java58
-rw-r--r--src/org/madore/android/unicodeMap/UnicodeDatabase.java31
2 files changed, 87 insertions, 2 deletions
diff --git a/src/org/madore/android/unicodeMap/UnicodeCharacter.java b/src/org/madore/android/unicodeMap/UnicodeCharacter.java
index 552b43c..166229f 100644
--- a/src/org/madore/android/unicodeMap/UnicodeCharacter.java
+++ b/src/org/madore/android/unicodeMap/UnicodeCharacter.java
@@ -225,6 +225,64 @@ public class UnicodeCharacter implements UnicodeDisplayable {
public String toString() { return this.descr; }
}
+ public static enum SpecialRange {
+ CJK_IDEOGRAPH_EXTENSION_A(0x3400, 0x4DB5),
+ CJK_IDEOGRAPH(0x4E00, 0x9FCB),
+ HANGUL_SYLLABLE(0xAC00, 0xD7A3),
+ CJK_IDEOGRAPH_EXTENSION_B(0x20000, 0x2A6D6),
+ CJK_IDEOGRAPH_EXTENSION_C(0x2A700, 0x2B734);
+ protected final int from; protected final int to;
+ SpecialRange(int from, int last) {
+ this.from = from;
+ this.to = last+1;
+ }
+ public int getFrom() { return this.from; }
+ public int getTo() { return this.to; }
+ public boolean belongs(int codePoint) {
+ return ( codePoint>=this.from && codePoint<this.to );
+ }
+ public int interCount(int from, int to) {
+ int from0 = (from<this.from)?this.from:from;
+ int to0 = (to>this.to)?this.to:to;
+ return (to0-from0>0) ? to0-from0 : 0;
+ }
+ public boolean inside(int from, int to) {
+ return ( from >= this.from && to <= this.to );
+ }
+ public static boolean isCjkUnifiedIdeograph(int codePoint) {
+ return ( CJK_IDEOGRAPH.belongs(codePoint)
+ || CJK_IDEOGRAPH_EXTENSION_A.belongs(codePoint)
+ || CJK_IDEOGRAPH_EXTENSION_B.belongs(codePoint)
+ || CJK_IDEOGRAPH_EXTENSION_C.belongs(codePoint) );
+ }
+ public static boolean isHangulSyllable(int codePoint) {
+ return HANGUL_SYLLABLE.belongs(codePoint);
+ }
+ public static String cjkUnifiedIdeographName(int codePoint) {
+ if ( ! isCjkUnifiedIdeograph(codePoint) )
+ return null;
+ return String.format("CJK UNIFIED IDEOGRAPH-%04X", codePoint);
+ }
+ public static String hangulSyllableName(int codePoint) {
+ if ( ! isHangulSyllable(codePoint) )
+ return null;
+ int index = codePoint - HANGUL_SYLLABLE.getFrom();
+ final int tCount = 28; final int nCount = 21*tCount;
+ int l = index/nCount;
+ int v = (index%nCount)/tCount;
+ int t = index%tCount;
+ final String[] partL = { "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S",
+ "SS", "", "J", "JJ", "C", "K", "T", "P", "H" };
+ final String[] partV = { "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA",
+ "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI",
+ "I" };
+ final String[] partT = { "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG",
+ "LM", "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", "S",
+ "SS", "NG", "J", "C", "K", "T", "P", "H" };
+ return String.format("HANGUL SYLLABLE %s%s%s", partL[l], partV[v], partT[t]);
+ }
+ }
+
public static enum Category {
UPPERCASE_LETTER("Lu", Character.UPPERCASE_LETTER),
LOWERCASE_LETTER("Ll", Character.LOWERCASE_LETTER),
diff --git a/src/org/madore/android/unicodeMap/UnicodeDatabase.java b/src/org/madore/android/unicodeMap/UnicodeDatabase.java
index 4114e6e..df91010 100644
--- a/src/org/madore/android/unicodeMap/UnicodeDatabase.java
+++ b/src/org/madore/android/unicodeMap/UnicodeDatabase.java
@@ -1,6 +1,8 @@
package org.madore.android.unicodeMap;
import java.util.Iterator;
+import java.util.List;
+import java.util.ArrayList;
import java.util.NoSuchElementException;
import java.io.InputStream;
import java.io.InputStreamReader;
@@ -143,13 +145,27 @@ public class UnicodeDatabase {
protected final static String[] queryColumns = { "id", "name", "category" };
+ public static UnicodeCharacter getSingleSpecial(int codePoint) {
+ if ( UnicodeCharacter.SpecialRange.isCjkUnifiedIdeograph(codePoint) )
+ return new UnicodeCharacter(codePoint,
+ UnicodeCharacter.SpecialRange.cjkUnifiedIdeographName(codePoint),
+ UnicodeCharacter.Category.OTHER_LETTER);
+ if ( UnicodeCharacter.SpecialRange.isHangulSyllable(codePoint) )
+ return new UnicodeCharacter(codePoint,
+ UnicodeCharacter.SpecialRange.hangulSyllableName(codePoint),
+ UnicodeCharacter.Category.OTHER_LETTER);
+ return null;
+ }
+
public UnicodeCharacter getSingle(int codePoint) {
+ UnicodeCharacter ch = getSingleSpecial(codePoint);
+ if ( ch != null )
+ return ch;
final Cursor c
= db.query(UNICODE_TABLE_NAME, queryColumns,
"id=?",
new String[] { Integer.toString(codePoint) },
null, null, null, null);
- UnicodeCharacter ch;
if ( c.getCount() > 0 ) {
c.moveToFirst();
ch = new UnicodeCharacter(c.getInt(0),
@@ -162,6 +178,9 @@ public class UnicodeDatabase {
}
public int countRange(int from, int to) {
+ int extra = 0;
+ for ( UnicodeCharacter.SpecialRange rng : UnicodeCharacter.SpecialRange.values() )
+ extra += rng.interCount(from, to);
final Cursor c
= db.rawQuery("SELECT count(*) FROM "+UNICODE_TABLE_NAME
+" WHERE id >= ? AND id < ?",
@@ -172,7 +191,7 @@ public class UnicodeDatabase {
c.moveToFirst();
int cnt = c.getInt(0);
c.close();
- return cnt;
+ return cnt + extra;
}
private static class CursorIterable
@@ -212,6 +231,14 @@ public class UnicodeDatabase {
}
public Iterable<UnicodeCharacter> getRange(int from, int to) {
+ for ( UnicodeCharacter.SpecialRange rng : UnicodeCharacter.SpecialRange.values() ) {
+ if ( rng.inside(from, to) ) {
+ List<UnicodeCharacter> list = new ArrayList<UnicodeCharacter>(to-from);
+ for ( int i=from ; i<to ; i++ )
+ list.add(getSingleSpecial(i));
+ return list;
+ }
+ }
final Cursor c
= db.query(UNICODE_TABLE_NAME, queryColumns,
"id >= ? AND id < ?",