package org.madore.android.unicodeMap; import java.util.Arrays; import java.util.Set; import java.util.Map; import java.util.EnumSet; import java.util.HashMap; import java.util.Formatter; public class UnicodeCharacter { enum Category { UPPERCASE_LETTER("Lu", Character.UPPERCASE_LETTER), LOWERCASE_LETTER("Ll", Character.LOWERCASE_LETTER), TITLECASE_LETTER("Lt", Character.TITLECASE_LETTER), MODIFIER_LETTER("Lm", Character.MODIFIER_LETTER), OTHER_LETTER("Lo", Character.OTHER_LETTER), NON_SPACING_MARK("Mn", Character.NON_SPACING_MARK), COMBINING_SPACING_MARK("Mc", Character.COMBINING_SPACING_MARK), ENCLOSING_MARK("Me", Character.ENCLOSING_MARK), DECIMAL_DIGIT_NUMBER("Nd", Character.DECIMAL_DIGIT_NUMBER), LETTER_NUMBER("Nl", Character.LETTER_NUMBER), OTHER_NUMBER("No", Character.OTHER_NUMBER), CONNECTOR_PUNCTUATION("Pc", Character.CONNECTOR_PUNCTUATION), DASH_PUNCTUATION("Pd", Character.DASH_PUNCTUATION), START_PUNCTUATION("Ps", Character.START_PUNCTUATION), END_PUNCTUATION("Pe", Character.END_PUNCTUATION), INITIAL_QUOTE_PUNCTUATION("Pi", Character.INITIAL_QUOTE_PUNCTUATION), FINAL_QUOTE_PUNCTUATION("Pf", Character.FINAL_QUOTE_PUNCTUATION), OTHER_PUNCTUATION("Po", Character.OTHER_PUNCTUATION), MATH_SYMBOL("Sm", Character.MATH_SYMBOL), CURRENCY_SYMBOL("Sc", Character.CURRENCY_SYMBOL), MODIFIER_SYMBOL("Sk", Character.MODIFIER_SYMBOL), OTHER_SYMBOL("So", Character.OTHER_SYMBOL), SPACE_SEPARATOR("Zs", Character.SPACE_SEPARATOR), LINE_SEPARATOR("Zl", Character.LINE_SEPARATOR), PARAGRAPH_SEPARATOR("Zp", Character.PARAGRAPH_SEPARATOR), CONTROL("Cc", Character.CONTROL), FORMAT("Cf", Character.FORMAT), SURROGATE("Cs", Character.SURROGATE), PRIVATE_USE("Co", Character.PRIVATE_USE), UNASSIGNED("Cn", Character.UNASSIGNED); public final String code; public final byte javaValue; Category(String code, byte javaValue) { this.code = code; this.javaValue = javaValue; } protected final static Map revMap = new HashMap(); static { for ( Category cat : Category.values() ) revMap.put(cat.code, cat); } static Category fromCode(String code) { Category cat = revMap.get(code); if ( cat == null ) cat = UNASSIGNED; return cat; } } protected final static Set printable; static { Category[] prlist = new Category[] { Category.UPPERCASE_LETTER, Category.LOWERCASE_LETTER, Category.TITLECASE_LETTER, Category.MODIFIER_LETTER, Category.OTHER_LETTER, Category.NON_SPACING_MARK, Category.COMBINING_SPACING_MARK, Category.ENCLOSING_MARK, Category.DECIMAL_DIGIT_NUMBER, Category.LETTER_NUMBER, Category.OTHER_NUMBER, Category.CONNECTOR_PUNCTUATION, Category.DASH_PUNCTUATION, Category.START_PUNCTUATION, Category.END_PUNCTUATION, Category.INITIAL_QUOTE_PUNCTUATION, Category.FINAL_QUOTE_PUNCTUATION, Category.OTHER_PUNCTUATION, Category.MATH_SYMBOL, Category.CURRENCY_SYMBOL, Category.MODIFIER_SYMBOL, Category.OTHER_SYMBOL }; printable = EnumSet.copyOf(Arrays.asList(prlist)); } protected final int codePoint; protected final String name; protected final Category category; public UnicodeCharacter(int codePoint, String name, Category category) { this.codePoint = codePoint; this.name = name; this.category = category; } public int getCodePoint() { return codePoint; } public String getName() { return name; } public boolean isPrintable() { return printable.contains(this.category); } @Override public String toString() { StringBuilder s = new StringBuilder(); if ( this.isPrintable() ) { s.append(Character.toChars(codePoint)); s.append(": "); } Formatter fmt = new Formatter(s); if ( codePoint < 0x10000 ) fmt.format("U+%04X %s", codePoint, name); else fmt.format("U+%X %s", codePoint, name); return new String(s); } }