summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/org/madore/android/unicodeMap/UnicodeCharacter.java92
-rw-r--r--src/org/madore/android/unicodeMap/UnicodeDatabase.java5
2 files changed, 90 insertions, 7 deletions
diff --git a/src/org/madore/android/unicodeMap/UnicodeCharacter.java b/src/org/madore/android/unicodeMap/UnicodeCharacter.java
index fb68169..11d8d3e 100644
--- a/src/org/madore/android/unicodeMap/UnicodeCharacter.java
+++ b/src/org/madore/android/unicodeMap/UnicodeCharacter.java
@@ -1,14 +1,89 @@
package org.madore.android.unicodeMap;
+import java.util.Arrays;
+import java.util.Set;
+import java.util.Map;
+import java.util.EnumSet;
+import java.util.HashMap;
import java.util.Formatter;
public class UnicodeCharacter {
+ enum Category {
+ UPPERCASE_LETTER("Lu", Character.UPPERCASE_LETTER),
+ LOWERCASE_LETTER("Ll", Character.LOWERCASE_LETTER),
+ TITLECASE_LETTER("Lt", Character.TITLECASE_LETTER),
+ MODIFIER_LETTER("Lm", Character.MODIFIER_LETTER),
+ OTHER_LETTER("Lo", Character.OTHER_LETTER),
+ NON_SPACING_MARK("Mn", Character.NON_SPACING_MARK),
+ COMBINING_SPACING_MARK("Mc", Character.COMBINING_SPACING_MARK),
+ ENCLOSING_MARK("Me", Character.ENCLOSING_MARK),
+ DECIMAL_DIGIT_NUMBER("Nd", Character.DECIMAL_DIGIT_NUMBER),
+ LETTER_NUMBER("Nl", Character.LETTER_NUMBER),
+ OTHER_NUMBER("No", Character.OTHER_NUMBER),
+ CONNECTOR_PUNCTUATION("Pc", Character.CONNECTOR_PUNCTUATION),
+ DASH_PUNCTUATION("Pd", Character.DASH_PUNCTUATION),
+ START_PUNCTUATION("Ps", Character.START_PUNCTUATION),
+ END_PUNCTUATION("Pe", Character.END_PUNCTUATION),
+ INITIAL_QUOTE_PUNCTUATION("Pi", Character.INITIAL_QUOTE_PUNCTUATION),
+ FINAL_QUOTE_PUNCTUATION("Pf", Character.FINAL_QUOTE_PUNCTUATION),
+ OTHER_PUNCTUATION("Po", Character.OTHER_PUNCTUATION),
+ MATH_SYMBOL("Sm", Character.MATH_SYMBOL),
+ CURRENCY_SYMBOL("Sc", Character.CURRENCY_SYMBOL),
+ MODIFIER_SYMBOL("Sk", Character.MODIFIER_SYMBOL),
+ OTHER_SYMBOL("So", Character.OTHER_SYMBOL),
+ SPACE_SEPARATOR("Zs", Character.SPACE_SEPARATOR),
+ LINE_SEPARATOR("Zl", Character.LINE_SEPARATOR),
+ PARAGRAPH_SEPARATOR("Zp", Character.PARAGRAPH_SEPARATOR),
+ CONTROL("Cc", Character.CONTROL),
+ FORMAT("Cf", Character.FORMAT),
+ SURROGATE("Cs", Character.SURROGATE),
+ PRIVATE_USE("Co", Character.PRIVATE_USE),
+ UNASSIGNED("Cn", Character.UNASSIGNED);
+ public final String code;
+ public final byte javaValue;
+ Category(String code, byte javaValue) {
+ this.code = code;
+ this.javaValue = javaValue;
+ }
+ protected final static Map<String,Category> revMap
+ = new HashMap<String,Category>();
+ static {
+ for ( Category cat : Category.values() )
+ revMap.put(cat.code, cat);
+ }
+ static Category fromCode(String code) {
+ Category cat = revMap.get(code);
+ if ( cat == null )
+ cat = UNASSIGNED;
+ return cat;
+ }
+ }
+
+ protected final static Set<Category> printable;
+
+ static {
+ Category[] prlist = new Category[] {
+ Category.UPPERCASE_LETTER, Category.LOWERCASE_LETTER,
+ Category.TITLECASE_LETTER, Category.MODIFIER_LETTER,
+ Category.OTHER_LETTER, Category.NON_SPACING_MARK,
+ Category.COMBINING_SPACING_MARK, Category.ENCLOSING_MARK,
+ Category.DECIMAL_DIGIT_NUMBER, Category.LETTER_NUMBER,
+ Category.OTHER_NUMBER, Category.CONNECTOR_PUNCTUATION,
+ Category.DASH_PUNCTUATION, Category.START_PUNCTUATION,
+ Category.END_PUNCTUATION, Category.INITIAL_QUOTE_PUNCTUATION,
+ Category.FINAL_QUOTE_PUNCTUATION, Category.OTHER_PUNCTUATION,
+ Category.MATH_SYMBOL, Category.CURRENCY_SYMBOL,
+ Category.MODIFIER_SYMBOL, Category.OTHER_SYMBOL
+ };
+ printable = EnumSet.copyOf(Arrays.asList(prlist));
+ }
+
protected final int codePoint;
protected final String name;
- protected final String category;
+ protected final Category category;
- public UnicodeCharacter(int codePoint, String name, String category) {
+ public UnicodeCharacter(int codePoint, String name, Category category) {
this.codePoint = codePoint;
this.name = name;
this.category = category;
@@ -22,15 +97,22 @@ public class UnicodeCharacter {
return name;
}
+ public boolean isPrintable() {
+ return printable.contains(this.category);
+ }
+
@Override
public String toString() {
StringBuilder s = new StringBuilder();
- s.append(Character.toChars(codePoint));
+ if ( this.isPrintable() ) {
+ s.append(Character.toChars(codePoint));
+ s.append(": ");
+ }
Formatter fmt = new Formatter(s);
if ( codePoint < 0x10000 )
- fmt.format(" (U+%04X %s)", codePoint, name);
+ fmt.format("U+%04X %s", codePoint, name);
else
- fmt.format(" (U+%X %s)", codePoint, name);
+ fmt.format("U+%X %s", codePoint, name);
return new String(s);
}
diff --git a/src/org/madore/android/unicodeMap/UnicodeDatabase.java b/src/org/madore/android/unicodeMap/UnicodeDatabase.java
index 9e809f2..9dff87f 100644
--- a/src/org/madore/android/unicodeMap/UnicodeDatabase.java
+++ b/src/org/madore/android/unicodeMap/UnicodeDatabase.java
@@ -101,7 +101,8 @@ public class UnicodeDatabase {
if ( c.getCount() > 0 ) {
c.moveToFirst();
ch = new UnicodeCharacter(c.getInt(0),
- c.getString(1), c.getString(2));
+ c.getString(1),
+ UnicodeCharacter.Category.fromCode(c.getString(2)));
} else
ch = null;
c.close();
@@ -144,7 +145,7 @@ public class UnicodeDatabase {
if ( hasNext ) {
UnicodeCharacter ch
= new UnicodeCharacter(c.getInt(0), c.getString(1),
- c.getString(2));
+ UnicodeCharacter.Category.fromCode(c.getString(2)));
if ( c.isLast() ) {
hasNext = false;
c.close();