From fa539907328dc02ba49b79f51a0e060bf42d85ee Mon Sep 17 00:00:00 2001 From: "David A. Madore" Date: Sat, 24 Apr 2010 01:05:05 +0200 Subject: Make categories into an enum. Show only printable characters. --- .../android/unicodeMap/UnicodeCharacter.java | 92 ++++++++++++++++++++-- .../madore/android/unicodeMap/UnicodeDatabase.java | 5 +- 2 files changed, 90 insertions(+), 7 deletions(-) (limited to 'src/org/madore/android') diff --git a/src/org/madore/android/unicodeMap/UnicodeCharacter.java b/src/org/madore/android/unicodeMap/UnicodeCharacter.java index fb68169..11d8d3e 100644 --- a/src/org/madore/android/unicodeMap/UnicodeCharacter.java +++ b/src/org/madore/android/unicodeMap/UnicodeCharacter.java @@ -1,14 +1,89 @@ package org.madore.android.unicodeMap; +import java.util.Arrays; +import java.util.Set; +import java.util.Map; +import java.util.EnumSet; +import java.util.HashMap; import java.util.Formatter; public class UnicodeCharacter { + enum Category { + UPPERCASE_LETTER("Lu", Character.UPPERCASE_LETTER), + LOWERCASE_LETTER("Ll", Character.LOWERCASE_LETTER), + TITLECASE_LETTER("Lt", Character.TITLECASE_LETTER), + MODIFIER_LETTER("Lm", Character.MODIFIER_LETTER), + OTHER_LETTER("Lo", Character.OTHER_LETTER), + NON_SPACING_MARK("Mn", Character.NON_SPACING_MARK), + COMBINING_SPACING_MARK("Mc", Character.COMBINING_SPACING_MARK), + ENCLOSING_MARK("Me", Character.ENCLOSING_MARK), + DECIMAL_DIGIT_NUMBER("Nd", Character.DECIMAL_DIGIT_NUMBER), + LETTER_NUMBER("Nl", Character.LETTER_NUMBER), + OTHER_NUMBER("No", Character.OTHER_NUMBER), + CONNECTOR_PUNCTUATION("Pc", Character.CONNECTOR_PUNCTUATION), + DASH_PUNCTUATION("Pd", Character.DASH_PUNCTUATION), + START_PUNCTUATION("Ps", Character.START_PUNCTUATION), + END_PUNCTUATION("Pe", Character.END_PUNCTUATION), + INITIAL_QUOTE_PUNCTUATION("Pi", Character.INITIAL_QUOTE_PUNCTUATION), + FINAL_QUOTE_PUNCTUATION("Pf", Character.FINAL_QUOTE_PUNCTUATION), + OTHER_PUNCTUATION("Po", Character.OTHER_PUNCTUATION), + MATH_SYMBOL("Sm", Character.MATH_SYMBOL), + CURRENCY_SYMBOL("Sc", Character.CURRENCY_SYMBOL), + MODIFIER_SYMBOL("Sk", Character.MODIFIER_SYMBOL), + OTHER_SYMBOL("So", Character.OTHER_SYMBOL), + SPACE_SEPARATOR("Zs", Character.SPACE_SEPARATOR), + LINE_SEPARATOR("Zl", Character.LINE_SEPARATOR), + PARAGRAPH_SEPARATOR("Zp", Character.PARAGRAPH_SEPARATOR), + CONTROL("Cc", Character.CONTROL), + FORMAT("Cf", Character.FORMAT), + SURROGATE("Cs", Character.SURROGATE), + PRIVATE_USE("Co", Character.PRIVATE_USE), + UNASSIGNED("Cn", Character.UNASSIGNED); + public final String code; + public final byte javaValue; + Category(String code, byte javaValue) { + this.code = code; + this.javaValue = javaValue; + } + protected final static Map revMap + = new HashMap(); + static { + for ( Category cat : Category.values() ) + revMap.put(cat.code, cat); + } + static Category fromCode(String code) { + Category cat = revMap.get(code); + if ( cat == null ) + cat = UNASSIGNED; + return cat; + } + } + + protected final static Set printable; + + static { + Category[] prlist = new Category[] { + Category.UPPERCASE_LETTER, Category.LOWERCASE_LETTER, + Category.TITLECASE_LETTER, Category.MODIFIER_LETTER, + Category.OTHER_LETTER, Category.NON_SPACING_MARK, + Category.COMBINING_SPACING_MARK, Category.ENCLOSING_MARK, + Category.DECIMAL_DIGIT_NUMBER, Category.LETTER_NUMBER, + Category.OTHER_NUMBER, Category.CONNECTOR_PUNCTUATION, + Category.DASH_PUNCTUATION, Category.START_PUNCTUATION, + Category.END_PUNCTUATION, Category.INITIAL_QUOTE_PUNCTUATION, + Category.FINAL_QUOTE_PUNCTUATION, Category.OTHER_PUNCTUATION, + Category.MATH_SYMBOL, Category.CURRENCY_SYMBOL, + Category.MODIFIER_SYMBOL, Category.OTHER_SYMBOL + }; + printable = EnumSet.copyOf(Arrays.asList(prlist)); + } + protected final int codePoint; protected final String name; - protected final String category; + protected final Category category; - public UnicodeCharacter(int codePoint, String name, String category) { + public UnicodeCharacter(int codePoint, String name, Category category) { this.codePoint = codePoint; this.name = name; this.category = category; @@ -22,15 +97,22 @@ public class UnicodeCharacter { return name; } + public boolean isPrintable() { + return printable.contains(this.category); + } + @Override public String toString() { StringBuilder s = new StringBuilder(); - s.append(Character.toChars(codePoint)); + if ( this.isPrintable() ) { + s.append(Character.toChars(codePoint)); + s.append(": "); + } Formatter fmt = new Formatter(s); if ( codePoint < 0x10000 ) - fmt.format(" (U+%04X %s)", codePoint, name); + fmt.format("U+%04X %s", codePoint, name); else - fmt.format(" (U+%X %s)", codePoint, name); + fmt.format("U+%X %s", codePoint, name); return new String(s); } diff --git a/src/org/madore/android/unicodeMap/UnicodeDatabase.java b/src/org/madore/android/unicodeMap/UnicodeDatabase.java index 9e809f2..9dff87f 100644 --- a/src/org/madore/android/unicodeMap/UnicodeDatabase.java +++ b/src/org/madore/android/unicodeMap/UnicodeDatabase.java @@ -101,7 +101,8 @@ public class UnicodeDatabase { if ( c.getCount() > 0 ) { c.moveToFirst(); ch = new UnicodeCharacter(c.getInt(0), - c.getString(1), c.getString(2)); + c.getString(1), + UnicodeCharacter.Category.fromCode(c.getString(2))); } else ch = null; c.close(); @@ -144,7 +145,7 @@ public class UnicodeDatabase { if ( hasNext ) { UnicodeCharacter ch = new UnicodeCharacter(c.getInt(0), c.getString(1), - c.getString(2)); + UnicodeCharacter.Category.fromCode(c.getString(2))); if ( c.isLast() ) { hasNext = false; c.close(); -- cgit v1.2.3