From 5ae584e2502efe74c53ee4f9c8d61872a9c89c36 Mon Sep 17 00:00:00 2001 From: "David A. Madore" Date: Fri, 16 Mar 2012 16:18:59 +0100 Subject: Upgrade database to Unicode 6.1. --- .../android/unicodeMap/UnicodeCharacter.java | 40 ++++++++++++++++++---- .../madore/android/unicodeMap/UnicodeDatabase.java | 4 +-- 2 files changed, 36 insertions(+), 8 deletions(-) (limited to 'src/org') diff --git a/src/org/madore/android/unicodeMap/UnicodeCharacter.java b/src/org/madore/android/unicodeMap/UnicodeCharacter.java index e79a982..0228644 100644 --- a/src/org/madore/android/unicodeMap/UnicodeCharacter.java +++ b/src/org/madore/android/unicodeMap/UnicodeCharacter.java @@ -32,6 +32,8 @@ public class UnicodeCharacter implements UnicodeDisplayable { THAANA(0x0780, 0x07BF, "Thaana"), NKO(0x07C0, 0x07FF, "NKo"), SAMARITAN(0x0800, 0x083F, "Samaritan"), + MANDAIC(0x0840, 0x085F, "Mandaic"), + ARABIC_EXTENDED_A(0x08A0, 0x08FF, "Arabic Extended-A"), DEVANAGARI(0x0900, 0x097F, "Devanagari"), BENGALI(0x0980, 0x09FF, "Bengali"), GURMUKHI(0x0A00, 0x0A7F, "Gurmukhi"), @@ -69,8 +71,10 @@ public class UnicodeCharacter implements UnicodeDisplayable { TAI_THAM(0x1A20, 0x1AAF, "Tai Tham"), BALINESE(0x1B00, 0x1B7F, "Balinese"), SUNDANESE(0x1B80, 0x1BBF, "Sundanese"), + BATAK(0x1BC0, 0x1BFF, "Batak"), LEPCHA(0x1C00, 0x1C4F, "Lepcha"), OL_CHIKI(0x1C50, 0x1C7F, "Ol Chiki"), + SUNDANESE_SUPPLEMENT(0x1CC0, 0x1CCF, "Sundanese Supplement"), VEDIC_EXTENSIONS(0x1CD0, 0x1CFF, "Vedic Extensions"), PHONETIC_EXTENSIONS(0x1D00, 0x1D7F, "Phonetic Extensions"), PHONETIC_EXTENSIONS_SUPPLEMENT(0x1D80, 0x1DBF, "Phonetic Extensions Supplement"), @@ -123,9 +127,9 @@ public class UnicodeCharacter implements UnicodeDisplayable { KATAKANA_PHONETIC_EXTENSIONS(0x31F0, 0x31FF, "Katakana Phonetic Extensions"), ENCLOSED_CJK_LETTERS_AND_MONTHS(0x3200, 0x32FF, "Enclosed CJK Letters and Months"), CJK_COMPATIBILITY(0x3300, 0x33FF, "CJK Compatibility"), - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A(0x3400, 0x4DBF, "CJK Unified Ideographs Extension A"), + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A(0x3400, 0x4DB5, "CJK Unified Ideographs Extension A"), YIJING_HEXAGRAM_SYMBOLS(0x4DC0, 0x4DFF, "Yijing Hexagram Symbols"), - CJK_UNIFIED_IDEOGRAPHS(0x4E00, 0x9FFF, "CJK Unified Ideographs"), + CJK_UNIFIED_IDEOGRAPHS(0x4E00, 0x9FCC, "CJK Unified Ideographs"), YI_SYLLABLES(0xA000, 0xA48F, "Yi Syllables"), YI_RADICALS(0xA490, 0xA4CF, "Yi Radicals"), LISU(0xA4D0, 0xA4FF, "Lisu"), @@ -146,8 +150,10 @@ public class UnicodeCharacter implements UnicodeDisplayable { CHAM(0xAA00, 0xAA5F, "Cham"), MYANMAR_EXTENDED_A(0xAA60, 0xAA7F, "Myanmar Extended-A"), TAI_VIET(0xAA80, 0xAADF, "Tai Viet"), + MEETEI_MAYEK_EXTENSIONS(0xAAE0, 0xAAFF, "Meetei Mayek Extensions"), + ETHIOPIC_EXTENDED_A(0xAB00, 0xAB2F, "Ethiopic Extended-A"), MEETEI_MAYEK(0xABC0, 0xABFF, "Meetei Mayek"), - HANGUL_SYLLABLES(0xAC00, 0xD7AF, "Hangul Syllables"), + HANGUL_SYLLABLES(0xAC00, 0xD7A3, "Hangul Syllables"), HANGUL_JAMO_EXTENDED_B(0xD7B0, 0xD7FF, "Hangul Jamo Extended-B"), HIGH_SURROGATES(0xD800, 0xDB7F, "High Surrogates"), HIGH_PRIVATE_USE_SURROGATES(0xDB80, 0xDBFF, "High Private Use Surrogates"), @@ -183,6 +189,8 @@ public class UnicodeCharacter implements UnicodeDisplayable { IMPERIAL_ARAMAIC(0x10840, 0x1085F, "Imperial Aramaic"), PHOENICIAN(0x10900, 0x1091F, "Phoenician"), LYDIAN(0x10920, 0x1093F, "Lydian"), + MEROITIC_HIEROGLYPHS(0x10980, 0x1099F, "Meroitic Hieroglyphs"), + MEROITIC_CURSIVE(0x109A0, 0x109FF, "Meroitic Cursive"), KHAROSHTHI(0x10A00, 0x10A5F, "Kharoshthi"), OLD_SOUTH_ARABIAN(0x10A60, 0x10A7F, "Old South Arabian"), AVESTAN(0x10B00, 0x10B3F, "Avestan"), @@ -190,22 +198,37 @@ public class UnicodeCharacter implements UnicodeDisplayable { INSCRIPTIONAL_PAHLAVI(0x10B60, 0x10B7F, "Inscriptional Pahlavi"), OLD_TURKIC(0x10C00, 0x10C4F, "Old Turkic"), RUMI_NUMERAL_SYMBOLS(0x10E60, 0x10E7F, "Rumi Numeral Symbols"), + BRAHMI(0x11000, 0x1107F, "Brahmi"), KAITHI(0x11080, 0x110CF, "Kaithi"), + SORA_SOMPENG(0x110D0, 0x110FF, "Sora Sompeng"), + CHAKMA(0x11100, 0x1114F, "Chakma"), + SHARADA(0x11180, 0x111DF, "Sharada"), + TAKRI(0x11680, 0x116CF, "Takri"), CUNEIFORM(0x12000, 0x123FF, "Cuneiform"), CUNEIFORM_NUMBERS_AND_PUNCTUATION(0x12400, 0x1247F, "Cuneiform Numbers and Punctuation"), EGYPTIAN_HIEROGLYPHS(0x13000, 0x1342F, "Egyptian Hieroglyphs"), + BAMUM_SUPPLEMENT(0x16800, 0x16A3F, "Bamum Supplement"), + MIAO(0x16F00, 0x16F9F, "Miao"), + KANA_SUPPLEMENT(0x1B000, 0x1B0FF, "Kana Supplement"), BYZANTINE_MUSICAL_SYMBOLS(0x1D000, 0x1D0FF, "Byzantine Musical Symbols"), MUSICAL_SYMBOLS(0x1D100, 0x1D1FF, "Musical Symbols"), ANCIENT_GREEK_MUSICAL_NOTATION(0x1D200, 0x1D24F, "Ancient Greek Musical Notation"), TAI_XUAN_JING_SYMBOLS(0x1D300, 0x1D35F, "Tai Xuan Jing Symbols"), COUNTING_ROD_NUMERALS(0x1D360, 0x1D37F, "Counting Rod Numerals"), MATHEMATICAL_ALPHANUMERIC_SYMBOLS(0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols"), + ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS(0x1EE00, 0x1EEFF, "Arabic Mathematical Alphabetic Symbols"), MAHJONG_TILES(0x1F000, 0x1F02F, "Mahjong Tiles"), DOMINO_TILES(0x1F030, 0x1F09F, "Domino Tiles"), + PLAYING_CARDS(0x1F0A0, 0x1F0FF, "Playing Cards"), ENCLOSED_ALPHANUMERIC_SUPPLEMENT(0x1F100, 0x1F1FF, "Enclosed Alphanumeric Supplement"), ENCLOSED_IDEOGRAPHIC_SUPPLEMENT(0x1F200, 0x1F2FF, "Enclosed Ideographic Supplement"), - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B(0x20000, 0x2A6DF, "CJK Unified Ideographs Extension B"), - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C(0x2A700, 0x2B73F, "CJK Unified Ideographs Extension C"), + MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS(0x1F300, 0x1F5FF, "Miscellaneous Symbols and Pictographs"), + EMOTICONS(0x1F600, 0x1F64F, "Emoticons"), + TRANSPORT_AND_MAP_SYMBOLS(0x1F680, 0x1F6FF, "Transport and Map Symbols"), + ALCHEMICAL_SYMBOLS(0x1F700, 0x1F77F, "Alchemical Symbols"), + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B(0x20000, 0x2A6D6, "CJK Unified Ideographs Extension B"), + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C(0x2A700, 0x2B734, "CJK Unified Ideographs Extension C"), + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D(0x2B740, 0x2B81D, "CJK Unified Ideographs Extension D"), CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT(0x2F800, 0x2FA1F, "CJK Compatibility Ideographs Supplement"), TAGS(0xE0000, 0xE007F, "Tags"), VARIATION_SELECTORS_SUPPLEMENT(0xE0100, 0xE01EF, "Variation Selectors Supplement"), @@ -238,7 +261,7 @@ public class UnicodeCharacter implements UnicodeDisplayable { return cjkIdeographName(codePoint); } }, - CJK_IDEOGRAPH(0x4E00, 0x9FCB, Category.OTHER_LETTER) { + CJK_IDEOGRAPH(0x4E00, 0x9FCC, Category.OTHER_LETTER) { public String getName(int codePoint) { return cjkIdeographName(codePoint); } @@ -257,6 +280,11 @@ public class UnicodeCharacter implements UnicodeDisplayable { public String getName(int codePoint) { return cjkIdeographName(codePoint); } + }, + CJK_IDEOGRAPH_EXTENSION_D(0x2B740, 0x2B81D, Category.OTHER_LETTER) { + public String getName(int codePoint) { + return cjkIdeographName(codePoint); + } }; protected final int from; protected final int to; protected Category category; diff --git a/src/org/madore/android/unicodeMap/UnicodeDatabase.java b/src/org/madore/android/unicodeMap/UnicodeDatabase.java index f453a8d..13dd85e 100644 --- a/src/org/madore/android/unicodeMap/UnicodeDatabase.java +++ b/src/org/madore/android/unicodeMap/UnicodeDatabase.java @@ -26,7 +26,7 @@ import android.database.sqlite.SQLiteOpenHelper; public class UnicodeDatabase { protected static final String DATABASE_NAME = "unicode.db"; - protected static final int DATABASE_VERSION = 2; + protected static final int DATABASE_VERSION = 3; protected static final String UNICODE_TABLE_NAME = "unicode"; protected boolean needPopulate; @@ -108,7 +108,7 @@ public class UnicodeDatabase { "UnicodeData-12.txt", "UnicodeData-13.txt", "UnicodeData-14.txt", "UnicodeData-15.txt", "UnicodeData-16.txt", "UnicodeData-16b.txt", "UnicodeData-17.txt", - "UnicodeData-18.txt", "UnicodeData-19.txt" + "UnicodeData-18.txt", "UnicodeData-18b.txt", "UnicodeData-19.txt" }; try { for ( int cnt=0 ; cnt