From c01f6ce7f6c70b72ceef439996c146f7b4744f98 Mon Sep 17 00:00:00 2001 From: "David A. Madore" Date: Thu, 29 Apr 2010 00:20:54 +0200 Subject: More proper handling of CJK and Hangul. --- .../android/unicodeMap/UnicodeCharacter.java | 71 +++++++++++------ .../madore/android/unicodeMap/UnicodeDatabase.java | 88 +++++++++++++++++----- .../android/unicodeMap/UnicodeMapActivity.java | 5 ++ 3 files changed, 125 insertions(+), 39 deletions(-) (limited to 'src/org/madore/android') diff --git a/src/org/madore/android/unicodeMap/UnicodeCharacter.java b/src/org/madore/android/unicodeMap/UnicodeCharacter.java index 166229f..f3ff6d1 100644 --- a/src/org/madore/android/unicodeMap/UnicodeCharacter.java +++ b/src/org/madore/android/unicodeMap/UnicodeCharacter.java @@ -226,18 +226,42 @@ public class UnicodeCharacter implements UnicodeDisplayable { } public static enum SpecialRange { - CJK_IDEOGRAPH_EXTENSION_A(0x3400, 0x4DB5), - CJK_IDEOGRAPH(0x4E00, 0x9FCB), - HANGUL_SYLLABLE(0xAC00, 0xD7A3), - CJK_IDEOGRAPH_EXTENSION_B(0x20000, 0x2A6D6), - CJK_IDEOGRAPH_EXTENSION_C(0x2A700, 0x2B734); + CJK_IDEOGRAPH_EXTENSION_A(0x3400, 0x4DB5, Category.OTHER_LETTER) { + public String getName(int codePoint) { + return cjkIdeographName(codePoint); + } + }, + CJK_IDEOGRAPH(0x4E00, 0x9FCB, Category.OTHER_LETTER) { + public String getName(int codePoint) { + return cjkIdeographName(codePoint); + } + }, + HANGUL_SYLLABLE(0xAC00, 0xD7A3, Category.OTHER_LETTER) { + public String getName(int codePoint) { + return hangulSyllableName(codePoint); + } + }, + CJK_IDEOGRAPH_EXTENSION_B(0x20000, 0x2A6D6, Category.OTHER_LETTER) { + public String getName(int codePoint) { + return cjkIdeographName(codePoint); + } + }, + CJK_IDEOGRAPH_EXTENSION_C(0x2A700, 0x2B734, Category.OTHER_LETTER) { + public String getName(int codePoint) { + return cjkIdeographName(codePoint); + } + }; protected final int from; protected final int to; - SpecialRange(int from, int last) { + protected Category category; + SpecialRange(int from, int last, Category category) { this.from = from; this.to = last+1; + this.category = category; } public int getFrom() { return this.from; } public int getTo() { return this.to; } + public Category getCategory() { return this.category; } + public String getName(int codePoint) { return null; } public boolean belongs(int codePoint) { return ( codePoint>=this.from && codePoint= this.from && to <= this.to ); } - public static boolean isCjkUnifiedIdeograph(int codePoint) { + public static boolean isCjkIdeograph(int codePoint) { return ( CJK_IDEOGRAPH.belongs(codePoint) || CJK_IDEOGRAPH_EXTENSION_A.belongs(codePoint) || CJK_IDEOGRAPH_EXTENSION_B.belongs(codePoint) @@ -258,28 +282,31 @@ public class UnicodeCharacter implements UnicodeDisplayable { public static boolean isHangulSyllable(int codePoint) { return HANGUL_SYLLABLE.belongs(codePoint); } - public static String cjkUnifiedIdeographName(int codePoint) { - if ( ! isCjkUnifiedIdeograph(codePoint) ) - return null; + protected static String cjkIdeographName(int codePoint) { return String.format("CJK UNIFIED IDEOGRAPH-%04X", codePoint); } - public static String hangulSyllableName(int codePoint) { - if ( ! isHangulSyllable(codePoint) ) - return null; + protected static String hangulSyllableName(int codePoint) { int index = codePoint - HANGUL_SYLLABLE.getFrom(); final int tCount = 28; final int nCount = 21*tCount; int l = index/nCount; int v = (index%nCount)/tCount; int t = index%tCount; - final String[] partL = { "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S", - "SS", "", "J", "JJ", "C", "K", "T", "P", "H" }; - final String[] partV = { "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA", - "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI", - "I" }; - final String[] partT = { "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", - "LM", "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", "S", - "SS", "NG", "J", "C", "K", "T", "P", "H" }; - return String.format("HANGUL SYLLABLE %s%s%s", partL[l], partV[v], partT[t]); + final String[] partL = { + "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S", + "SS", "", "J", "JJ", "C", "K", "T", "P", "H" + }; + final String[] partV = { + "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA", + "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI", + "I" + }; + final String[] partT = { + "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", + "LM", "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", "S", + "SS", "NG", "J", "C", "K", "T", "P", "H" + }; + return String.format("HANGUL SYLLABLE %s%s%s", + partL[l], partV[v], partT[t]); } } diff --git a/src/org/madore/android/unicodeMap/UnicodeDatabase.java b/src/org/madore/android/unicodeMap/UnicodeDatabase.java index df91010..efdf5eb 100644 --- a/src/org/madore/android/unicodeMap/UnicodeDatabase.java +++ b/src/org/madore/android/unicodeMap/UnicodeDatabase.java @@ -1,6 +1,8 @@ package org.madore.android.unicodeMap; +import java.util.Collections; import java.util.Iterator; +import java.util.Comparator; import java.util.List; import java.util.ArrayList; import java.util.NoSuchElementException; @@ -145,27 +147,23 @@ public class UnicodeDatabase { protected final static String[] queryColumns = { "id", "name", "category" }; - public static UnicodeCharacter getSingleSpecial(int codePoint) { - if ( UnicodeCharacter.SpecialRange.isCjkUnifiedIdeograph(codePoint) ) - return new UnicodeCharacter(codePoint, - UnicodeCharacter.SpecialRange.cjkUnifiedIdeographName(codePoint), - UnicodeCharacter.Category.OTHER_LETTER); - if ( UnicodeCharacter.SpecialRange.isHangulSyllable(codePoint) ) - return new UnicodeCharacter(codePoint, - UnicodeCharacter.SpecialRange.hangulSyllableName(codePoint), - UnicodeCharacter.Category.OTHER_LETTER); - return null; + public static UnicodeCharacter getSingleSpecial(UnicodeCharacter.SpecialRange rng, + int codePoint) { + return new UnicodeCharacter(codePoint, + rng.getName(codePoint), + rng.getCategory()); } public UnicodeCharacter getSingle(int codePoint) { - UnicodeCharacter ch = getSingleSpecial(codePoint); - if ( ch != null ) - return ch; + for ( UnicodeCharacter.SpecialRange rng : UnicodeCharacter.SpecialRange.values() ) + if ( rng.belongs(codePoint) ) + return getSingleSpecial(rng, codePoint); final Cursor c = db.query(UNICODE_TABLE_NAME, queryColumns, "id=?", new String[] { Integer.toString(codePoint) }, null, null, null, null); + UnicodeCharacter ch; if ( c.getCount() > 0 ) { c.moveToFirst(); ch = new UnicodeCharacter(c.getInt(0), @@ -230,13 +228,51 @@ public class UnicodeDatabase { } } + private static class SpecialRangeIterable + implements Iterable { + protected final UnicodeCharacter.SpecialRange rng; + protected final int from; protected final int to; + public SpecialRangeIterable(UnicodeCharacter.SpecialRange rng, + int from, int to) { + this.rng = rng; + this.from = from; + this.to = to; + } + public Iterator iterator() { + return (new Iterator() { + int i = from; + public boolean hasNext() { return i < to; } + public UnicodeCharacter next() { + if ( i < to ) + return getSingleSpecial(rng, i++); + else + throw new NoSuchElementException(); + } + public void remove() { + throw new UnsupportedOperationException(); + } + }); + } + } + public Iterable getRange(int from, int to) { + List annoyance = null; + boolean muchAnnoyance = false; for ( UnicodeCharacter.SpecialRange rng : UnicodeCharacter.SpecialRange.values() ) { if ( rng.inside(from, to) ) { - List list = new ArrayList(to-from); - for ( int i=from ; i 0 ) { + if ( annoyance == null ) + annoyance + = new ArrayList(rng.interCount(from, to)); + else + muchAnnoyance = true; + int from0 = (fromrng.getTo())?rng.getTo():to; + Iterable rangeIt + = new SpecialRangeIterable(rng, from0, to0); + for ( UnicodeCharacter ch : rangeIt ) + annoyance.add(ch); } } final Cursor c @@ -245,6 +281,24 @@ public class UnicodeDatabase { new String[] { Integer.toString(from), Integer.toString(to) }, null, null, "id", null); + if ( annoyance != null ) { + Iterable cursorIt + = new CursorIterable(c); + for ( UnicodeCharacter ch : cursorIt ) { + annoyance.add(ch); + muchAnnoyance = true; + } + if ( muchAnnoyance ) + Collections.sort(annoyance, new Comparator() { + public int compare(UnicodeCharacter ch1, + UnicodeCharacter ch2) { + int cp1 = ch1.getCodePoint(); + int cp2 = ch2.getCodePoint(); + return (cp1cp2)?1:0; + } + }); + return annoyance; + } return new CursorIterable(c); } diff --git a/src/org/madore/android/unicodeMap/UnicodeMapActivity.java b/src/org/madore/android/unicodeMap/UnicodeMapActivity.java index b48c771..0c80741 100644 --- a/src/org/madore/android/unicodeMap/UnicodeMapActivity.java +++ b/src/org/madore/android/unicodeMap/UnicodeMapActivity.java @@ -158,6 +158,11 @@ public final class UnicodeMapActivity extends ListActivity { setListAdapter(adapter); } else if ( disp instanceof RootDisplay ) { list = new ArrayList(UnicodeCharacter.Range.values().length); + // list.add(new UnicodeRangeable() { + // public int getFrom() { return 0x4DF8; } + // public int getTo() { return 0x4E10; } + // public String getDescr() { return "TEST"; } + // }); for ( UnicodeCharacter.Range rng : UnicodeCharacter.Range.values() ) list.add(rng); disp.setListCache(list); -- cgit v1.2.3