From 7108dafb73f4c06261a56b819f4530ffd17bf6e7 Mon Sep 17 00:00:00 2001 From: "David A. Madore" Date: Tue, 27 Apr 2010 20:55:58 +0200 Subject: Preliminary implementation of CJK and Hangul. --- .../android/unicodeMap/UnicodeCharacter.java | 58 ++++++++++++++++++++++ .../madore/android/unicodeMap/UnicodeDatabase.java | 31 +++++++++++- 2 files changed, 87 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/org/madore/android/unicodeMap/UnicodeCharacter.java b/src/org/madore/android/unicodeMap/UnicodeCharacter.java index 552b43c..166229f 100644 --- a/src/org/madore/android/unicodeMap/UnicodeCharacter.java +++ b/src/org/madore/android/unicodeMap/UnicodeCharacter.java @@ -225,6 +225,64 @@ public class UnicodeCharacter implements UnicodeDisplayable { public String toString() { return this.descr; } } + public static enum SpecialRange { + CJK_IDEOGRAPH_EXTENSION_A(0x3400, 0x4DB5), + CJK_IDEOGRAPH(0x4E00, 0x9FCB), + HANGUL_SYLLABLE(0xAC00, 0xD7A3), + CJK_IDEOGRAPH_EXTENSION_B(0x20000, 0x2A6D6), + CJK_IDEOGRAPH_EXTENSION_C(0x2A700, 0x2B734); + protected final int from; protected final int to; + SpecialRange(int from, int last) { + this.from = from; + this.to = last+1; + } + public int getFrom() { return this.from; } + public int getTo() { return this.to; } + public boolean belongs(int codePoint) { + return ( codePoint>=this.from && codePointthis.to)?this.to:to; + return (to0-from0>0) ? to0-from0 : 0; + } + public boolean inside(int from, int to) { + return ( from >= this.from && to <= this.to ); + } + public static boolean isCjkUnifiedIdeograph(int codePoint) { + return ( CJK_IDEOGRAPH.belongs(codePoint) + || CJK_IDEOGRAPH_EXTENSION_A.belongs(codePoint) + || CJK_IDEOGRAPH_EXTENSION_B.belongs(codePoint) + || CJK_IDEOGRAPH_EXTENSION_C.belongs(codePoint) ); + } + public static boolean isHangulSyllable(int codePoint) { + return HANGUL_SYLLABLE.belongs(codePoint); + } + public static String cjkUnifiedIdeographName(int codePoint) { + if ( ! isCjkUnifiedIdeograph(codePoint) ) + return null; + return String.format("CJK UNIFIED IDEOGRAPH-%04X", codePoint); + } + public static String hangulSyllableName(int codePoint) { + if ( ! isHangulSyllable(codePoint) ) + return null; + int index = codePoint - HANGUL_SYLLABLE.getFrom(); + final int tCount = 28; final int nCount = 21*tCount; + int l = index/nCount; + int v = (index%nCount)/tCount; + int t = index%tCount; + final String[] partL = { "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S", + "SS", "", "J", "JJ", "C", "K", "T", "P", "H" }; + final String[] partV = { "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA", + "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI", + "I" }; + final String[] partT = { "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", + "LM", "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", "S", + "SS", "NG", "J", "C", "K", "T", "P", "H" }; + return String.format("HANGUL SYLLABLE %s%s%s", partL[l], partV[v], partT[t]); + } + } + public static enum Category { UPPERCASE_LETTER("Lu", Character.UPPERCASE_LETTER), LOWERCASE_LETTER("Ll", Character.LOWERCASE_LETTER), diff --git a/src/org/madore/android/unicodeMap/UnicodeDatabase.java b/src/org/madore/android/unicodeMap/UnicodeDatabase.java index 4114e6e..df91010 100644 --- a/src/org/madore/android/unicodeMap/UnicodeDatabase.java +++ b/src/org/madore/android/unicodeMap/UnicodeDatabase.java @@ -1,6 +1,8 @@ package org.madore.android.unicodeMap; import java.util.Iterator; +import java.util.List; +import java.util.ArrayList; import java.util.NoSuchElementException; import java.io.InputStream; import java.io.InputStreamReader; @@ -143,13 +145,27 @@ public class UnicodeDatabase { protected final static String[] queryColumns = { "id", "name", "category" }; + public static UnicodeCharacter getSingleSpecial(int codePoint) { + if ( UnicodeCharacter.SpecialRange.isCjkUnifiedIdeograph(codePoint) ) + return new UnicodeCharacter(codePoint, + UnicodeCharacter.SpecialRange.cjkUnifiedIdeographName(codePoint), + UnicodeCharacter.Category.OTHER_LETTER); + if ( UnicodeCharacter.SpecialRange.isHangulSyllable(codePoint) ) + return new UnicodeCharacter(codePoint, + UnicodeCharacter.SpecialRange.hangulSyllableName(codePoint), + UnicodeCharacter.Category.OTHER_LETTER); + return null; + } + public UnicodeCharacter getSingle(int codePoint) { + UnicodeCharacter ch = getSingleSpecial(codePoint); + if ( ch != null ) + return ch; final Cursor c = db.query(UNICODE_TABLE_NAME, queryColumns, "id=?", new String[] { Integer.toString(codePoint) }, null, null, null, null); - UnicodeCharacter ch; if ( c.getCount() > 0 ) { c.moveToFirst(); ch = new UnicodeCharacter(c.getInt(0), @@ -162,6 +178,9 @@ public class UnicodeDatabase { } public int countRange(int from, int to) { + int extra = 0; + for ( UnicodeCharacter.SpecialRange rng : UnicodeCharacter.SpecialRange.values() ) + extra += rng.interCount(from, to); final Cursor c = db.rawQuery("SELECT count(*) FROM "+UNICODE_TABLE_NAME +" WHERE id >= ? AND id < ?", @@ -172,7 +191,7 @@ public class UnicodeDatabase { c.moveToFirst(); int cnt = c.getInt(0); c.close(); - return cnt; + return cnt + extra; } private static class CursorIterable @@ -212,6 +231,14 @@ public class UnicodeDatabase { } public Iterable getRange(int from, int to) { + for ( UnicodeCharacter.SpecialRange rng : UnicodeCharacter.SpecialRange.values() ) { + if ( rng.inside(from, to) ) { + List list = new ArrayList(to-from); + for ( int i=from ; i= ? AND id < ?", -- cgit v1.2.3