From 7108dafb73f4c06261a56b819f4530ffd17bf6e7 Mon Sep 17 00:00:00 2001
From: "David A. Madore" <david@procyon.(none)>
Date: Tue, 27 Apr 2010 20:55:58 +0200
Subject: Preliminary implementation of CJK and Hangul.

---
 .../android/unicodeMap/UnicodeCharacter.java       | 58 ++++++++++++++++++++++
 .../madore/android/unicodeMap/UnicodeDatabase.java | 31 +++++++++++-
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/src/org/madore/android/unicodeMap/UnicodeCharacter.java b/src/org/madore/android/unicodeMap/UnicodeCharacter.java
index 552b43c..166229f 100644
--- a/src/org/madore/android/unicodeMap/UnicodeCharacter.java
+++ b/src/org/madore/android/unicodeMap/UnicodeCharacter.java
@@ -225,6 +225,64 @@ public class UnicodeCharacter implements UnicodeDisplayable {
 	public String toString() { return this.descr; }
     }
 
+    public static enum SpecialRange {
+	CJK_IDEOGRAPH_EXTENSION_A(0x3400, 0x4DB5),
+	CJK_IDEOGRAPH(0x4E00, 0x9FCB),
+	HANGUL_SYLLABLE(0xAC00, 0xD7A3),
+	CJK_IDEOGRAPH_EXTENSION_B(0x20000, 0x2A6D6),
+	CJK_IDEOGRAPH_EXTENSION_C(0x2A700, 0x2B734);
+	protected final int from;  protected final int to;
+	SpecialRange(int from, int last) {
+	    this.from = from;
+	    this.to = last+1;
+	}
+	public int getFrom() { return this.from; }
+	public int getTo() { return this.to; }
+	public boolean belongs(int codePoint) {
+	    return ( codePoint>=this.from && codePoint<this.to );
+	}
+	public int interCount(int from, int to) {
+	    int from0 = (from<this.from)?this.from:from;
+	    int to0 = (to>this.to)?this.to:to;
+	    return (to0-from0>0) ? to0-from0 : 0;
+	}
+	public boolean inside(int from, int to) {
+	    return ( from >= this.from && to <= this.to );
+	}
+	public static boolean isCjkUnifiedIdeograph(int codePoint) {
+	    return ( CJK_IDEOGRAPH.belongs(codePoint)
+		     || CJK_IDEOGRAPH_EXTENSION_A.belongs(codePoint)
+		     || CJK_IDEOGRAPH_EXTENSION_B.belongs(codePoint)
+		     || CJK_IDEOGRAPH_EXTENSION_C.belongs(codePoint) );
+	}
+	public static boolean isHangulSyllable(int codePoint) {
+	    return HANGUL_SYLLABLE.belongs(codePoint);
+	}
+	public static String cjkUnifiedIdeographName(int codePoint) {
+	    if ( ! isCjkUnifiedIdeograph(codePoint) )
+		return null;
+	    return String.format("CJK UNIFIED IDEOGRAPH-%04X", codePoint);
+	}
+	public static String hangulSyllableName(int codePoint) {
+	    if ( ! isHangulSyllable(codePoint) )
+		return null;
+	    int index = codePoint - HANGUL_SYLLABLE.getFrom();
+	    final int tCount = 28;  final int nCount = 21*tCount;
+	    int l = index/nCount;
+	    int v = (index%nCount)/tCount;
+	    int t = index%tCount;
+	    final String[] partL = { "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S",
+				     "SS", "", "J", "JJ", "C", "K", "T", "P", "H" };
+	    final String[] partV = { "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA",
+				     "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI",
+				     "I" };
+	    final String[] partT = { "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG",
+				     "LM", "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", "S",
+				     "SS", "NG", "J", "C", "K", "T", "P", "H" };
+	    return String.format("HANGUL SYLLABLE %s%s%s", partL[l], partV[v], partT[t]);
+	}
+    }
+
     public static enum Category {
 	UPPERCASE_LETTER("Lu", Character.UPPERCASE_LETTER),
 	LOWERCASE_LETTER("Ll", Character.LOWERCASE_LETTER),
diff --git a/src/org/madore/android/unicodeMap/UnicodeDatabase.java b/src/org/madore/android/unicodeMap/UnicodeDatabase.java
index 4114e6e..df91010 100644
--- a/src/org/madore/android/unicodeMap/UnicodeDatabase.java
+++ b/src/org/madore/android/unicodeMap/UnicodeDatabase.java
@@ -1,6 +1,8 @@
 package org.madore.android.unicodeMap;
 
 import java.util.Iterator;
+import java.util.List;
+import java.util.ArrayList;
 import java.util.NoSuchElementException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -143,13 +145,27 @@ public class UnicodeDatabase {
 
     protected final static String[] queryColumns = { "id", "name", "category" };
 
+    public static UnicodeCharacter getSingleSpecial(int codePoint) {
+	if ( UnicodeCharacter.SpecialRange.isCjkUnifiedIdeograph(codePoint) )
+	    return new UnicodeCharacter(codePoint,
+					UnicodeCharacter.SpecialRange.cjkUnifiedIdeographName(codePoint),
+					UnicodeCharacter.Category.OTHER_LETTER);
+	if ( UnicodeCharacter.SpecialRange.isHangulSyllable(codePoint) )
+	    return new UnicodeCharacter(codePoint,
+					UnicodeCharacter.SpecialRange.hangulSyllableName(codePoint),
+					UnicodeCharacter.Category.OTHER_LETTER);
+	return null;
+    }
+
     public UnicodeCharacter getSingle(int codePoint) {
+	UnicodeCharacter ch = getSingleSpecial(codePoint);
+	if ( ch != null )
+	    return ch;
 	final Cursor c
 	    = db.query(UNICODE_TABLE_NAME, queryColumns,
 		       "id=?",
 		       new String[] { Integer.toString(codePoint) },
 		       null, null, null, null);
-	UnicodeCharacter ch;
 	if ( c.getCount() > 0 ) {
 	    c.moveToFirst();
 	    ch = new UnicodeCharacter(c.getInt(0),
@@ -162,6 +178,9 @@ public class UnicodeDatabase {
     }
 
     public int countRange(int from, int to) {
+	int extra = 0;
+	for ( UnicodeCharacter.SpecialRange rng : UnicodeCharacter.SpecialRange.values() )
+	    extra += rng.interCount(from, to);
 	final Cursor c
 	    = db.rawQuery("SELECT count(*) FROM "+UNICODE_TABLE_NAME
 			  +" WHERE id >= ? AND id < ?",
@@ -172,7 +191,7 @@ public class UnicodeDatabase {
 	c.moveToFirst();
 	int cnt = c.getInt(0);
 	c.close();
-	return cnt;
+	return cnt + extra;
     }
 
     private static class CursorIterable
@@ -212,6 +231,14 @@ public class UnicodeDatabase {
     }
 
     public Iterable<UnicodeCharacter> getRange(int from, int to) {
+	for ( UnicodeCharacter.SpecialRange rng : UnicodeCharacter.SpecialRange.values() ) {
+	    if ( rng.inside(from, to) ) {
+		List<UnicodeCharacter> list = new ArrayList<UnicodeCharacter>(to-from);
+		for ( int i=from ; i<to ; i++ )
+		    list.add(getSingleSpecial(i));
+		return list;
+	    }
+	}
 	final Cursor c
 	    = db.query(UNICODE_TABLE_NAME, queryColumns,
 		       "id >= ? AND id < ?",
-- 
cgit v1.2.3