diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 1517dde0ce..d71b11c1cf 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -310,6 +310,7 @@ FFF0..FFFF; Specials 18800..18AFF; Tangut Components 18B00..18CFF; Khitan Small Script 18D00..18D7F; Tangut Supplement +18D80..18DFF; Tangut Components Supplement 1AFF0..1AFFF; Kana Extended-B 1B000..1B0FF; Kana Supplement 1B100..1B12F; Kana Extended-A diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e487..d3da6bd7cc 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ -# DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# DerivedAge-17.0.0.txt +# Date: 2024-10-16, 17:24:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2041,7 +2041,9 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 13460..143FA ; 16.0 # [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 16100..16139 ; 16.0 # [58] GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE 16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE +187F8..187FF ; 16.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF +18D09..18D1C ; 16.0 # [20] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE 1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE @@ -2057,6 +2059,16 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5185 +# Total code points: 5213 + +# ================================================ + +# Age=V17_0 + +# Newly assigned in Unicode 17.0.0 (September, 2025) + +18D80..18D81 ; 17.0 # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 + +# Total code points: 2 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1075638f1a..41a8d0e234 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ -# DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-31, 18:09:32 GMT +# DerivedCoreProperties-17.0.0.txt +# Date: 2024-10-16, 17:24:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1320,9 +1320,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FE0..16FE1 ; Alphabetic # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; Alphabetic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; Alphabetic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Alphabetic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1441,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142759 +# Total code points: 142789 # ================================================ @@ -6852,9 +6852,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; ID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; ID_Start # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -6962,7 +6962,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141299 # ================================================ @@ -8222,9 +8222,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FE3 ; ID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; ID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; ID_Continue # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8370,7 +8370,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144571 # ================================================ @@ -9038,9 +9038,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; XID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; XID_Start # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; XID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -9148,7 +9148,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141276 # ================================================ @@ -10409,9 +10409,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FE3 ; XID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; XID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; XID_Continue # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; XID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -10557,7 +10557,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144552 # ================================================ @@ -12607,9 +12607,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE0..16FE1 ; Grapheme_Base # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; Grapheme_Base # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; Grapheme_Base # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Grapheme_Base # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Grapheme_Base # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -12812,7 +12812,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152730 +# Total code points: 152760 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea5..1f0808f8e3 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# Date: 2024-06-07, 20:56:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2360,11 +2360,12 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +17000..187FF ; W # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; W # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3ce2582174..63652ca422 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:55 GMT +# LineBreak-17.0.0.txt +# Date: 2024-10-16, 17:24:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3270,11 +3270,12 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FE3 ; NS # Lm OLD CHINESE ITERATION MARK 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +17000..187FF ; ID # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; AL # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index fae2831e7a..b3a671f694 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ -# PropList-16.0.0.txt -# Date: 2024-05-31, 18:09:48 GMT +# PropList-17.0.0.txt +# Date: 2024-10-16, 17:25:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -871,9 +871,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER -17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; Ideographic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; Ideographic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Ideographic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -885,7 +885,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106477 +# Total code points: 106507 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 4358a787e6..a3b3ea0908 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-09-11, 23:38:17 GMT +# Date: 2024-10-16, 17:25:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -463,6 +463,7 @@ blk; Tamil_Sup ; Tamil_Supplement blk; Tangsa ; Tangsa blk; Tangut ; Tangut blk; Tangut_Components ; Tangut_Components +blk; Tangut_Components_Sup ; Tangut_Components_Supplement blk; Tangut_Sup ; Tangut_Supplement blk; Telugu ; Telugu blk; Thaana ; Thaana diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd6..d903a4b508 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Date: 2024-06-07, 20:56:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2756,11 +2756,11 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ 16FE0 ; Tangut # Lm TANGUT ITERATION MARK -17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF ; Tangut # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18D00..18D08 ; Tangut # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 +18D00..18D1C ; Tangut # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Tangut # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 6914 +# Total code points: 6944 # ================================================ diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index 9c888daebb..ad9bc8c486 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -51,6 +51,8 @@ # [Grammar of the Tangut Language]. Moscow, 1968. # UTN42 = Andrew West and Viacheslav Zaytsev, Tangut Character Additions and Glyph Corrections, # Unicode Technical Note #42. 2019-12-21. +# N5217 = Andrew West, Proposal to encode 2 Tangut components and 28 Tangut ideographs, +# WG2 N5217 = L2/23-149. 2023-10-02. # # For more information, see Section 18.11, Tangut, of the # core specification. @@ -12327,6 +12329,22 @@ U+187F6 kTGT_MergedSrc UTN42-010 U+187F6 kRSTUnicode 79.14 U+187F7 kTGT_MergedSrc UTN42-011 U+187F7 kRSTUnicode 79.19 +U+187F8 kTGT_MergedSrc N5217-01 +U+187F8 kRSTUnicode 206.19 +U+187F9 kTGT_MergedSrc N5217-02 +U+187F9 kRSTUnicode 267.9 +U+187FA kTGT_MergedSrc N5217-03 +U+187FA kRSTUnicode 75.19 +U+187FB kTGT_MergedSrc N5217-04 +U+187FB kRSTUnicode 490.12 +U+187FC kTGT_MergedSrc N5217-05 +U+187FC kRSTUnicode 114.9 +U+187FD kTGT_MergedSrc N5217-06 +U+187FD kRSTUnicode 217.15 +U+187FE kTGT_MergedSrc N5217-07 +U+187FE kRSTUnicode 519.13 +U+187FF kTGT_MergedSrc N5217-08 +U+187FF kRSTUnicode 736.18 U+18D00 kTGT_MergedSrc L2008-3489 U+18D00 kRSTUnicode 17.7 U+18D01 kTGT_MergedSrc L2008-1667 @@ -12345,5 +12363,45 @@ U+18D07 kTGT_MergedSrc L2008-1106 U+18D07 kRSTUnicode 485.12 U+18D08 kTGT_MergedSrc L2008-4456 U+18D08 kRSTUnicode 674.14 +U+18D09 kTGT_MergedSrc N5217-09 +U+18D09 kRSTUnicode 590.14 +U+18D0A kTGT_MergedSrc N5217-10 +U+18D0A kRSTUnicode 267.18 +U+18D0B kTGT_MergedSrc N5217-11 +U+18D0B kRSTUnicode 267.10 +U+18D0C kTGT_MergedSrc N5217-12 +U+18D0C kRSTUnicode 655.13 +U+18D0D kTGT_MergedSrc N5217-13 +U+18D0D kRSTUnicode 456.14 +U+18D0E kTGT_MergedSrc N5217-14 +U+18D0E kRSTUnicode 273.9 +U+18D0F kTGT_MergedSrc N5217-15 +U+18D0F kRSTUnicode 278.11 +U+18D10 kTGT_MergedSrc N5217-16 +U+18D10 kRSTUnicode 106.13 +U+18D11 kTGT_MergedSrc N5217-17 +U+18D11 kRSTUnicode 75.8 +U+18D12 kTGT_MergedSrc N5217-18 +U+18D12 kRSTUnicode 17.7 +U+18D13 kTGT_MergedSrc N5217-19 +U+18D13 kRSTUnicode 106.15 +U+18D14 kTGT_MergedSrc N5217-20 +U+18D14 kRSTUnicode 167.15 +U+18D15 kTGT_MergedSrc N5217-21 +U+18D15 kRSTUnicode 462.11 +U+18D16 kTGT_MergedSrc N5217-22 +U+18D16 kRSTUnicode 579.17 +U+18D17 kTGT_MergedSrc N5217-23 +U+18D17 kRSTUnicode 210.13 +U+18D18 kTGT_MergedSrc N5217-24 +U+18D18 kRSTUnicode 278.13 +U+18D19 kTGT_MergedSrc N5217-25 +U+18D19 kRSTUnicode 141.19 +U+18D1A kTGT_MergedSrc N5217-26 +U+18D1A kRSTUnicode 75.13 +U+18D1B kTGT_MergedSrc N5217-27 +U+18D1B kRSTUnicode 36.7 +U+18D1C kTGT_MergedSrc N5217-28 +U+18D1C kRSTUnicode 141.9 # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a3739..76df07294c 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -30281,7 +30281,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; -187F7;;Lo;0;L;;;;;N;;;;; +187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; 18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;; 18802;TANGUT COMPONENT-003;Lo;0;L;;;;;N;;;;; @@ -31522,7 +31522,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18CFF;KHITAN SMALL SCRIPT CHARACTER-18CFF;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; -18D08;;Lo;0;L;;;;;N;;;;; +18D1C;;Lo;0;L;;;;;N;;;;; +18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; +18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 1AFF0;KATAKANA LETTER MINNAN TONE-2;Lm;0;L;;;;;N;;;;; 1AFF1;KATAKANA LETTER MINNAN TONE-3;Lm;0;L;;;;;N;;;;; 1AFF2;KATAKANA LETTER MINNAN TONE-4;Lm;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd72285..0375c76715 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# Date: 2024-06-07, 20:56:33 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -46,6 +46,7 @@ # Anatolian Hieroglyphs: U+14400..U+1467F # Ideographic Symbols & Tangut: U+16FE0..U+18AFF # Khitan Small Script & Tangut Sup: U+18B00..U+18D7F +# Tangut Components Supplement: U+18D80..U+18DFF # Kana Extended-B: U+1AFF0..U+1AFFF # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF @@ -2189,14 +2190,15 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FE5..16FEF ; U # Cn [11] .. 16FF0..16FF1 ; U # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FFF ; U # Cn [14] .. -17000..187F7 ; U # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -187F8..187FF ; U # Cn [8] .. +17000..187FF ; U # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CD6..18CFE ; U # Cn [41] .. 18CFF ; U # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D08 ; U # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 -18D09..18D7F ; U # Cn [119] .. +18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D1D..18D7F ; U # Cn [99] .. +18D80..18D81 ; U # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D82..18DFF ; U # Cn [126] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index ca3689e6bc..be969da594 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ -# SentenceBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:32 GMT +# SentenceBreakProperty-17.0.0.txt +# Date: 2024-10-16, 17:25:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2510,9 +2510,9 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; OLetter # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; OLetter # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; OLetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; OLetter # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2585,7 +2585,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136908 +# Total code points: 136938 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa0..7775cb4844 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ -# DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# DerivedBidiClass-17.0.0.txt +# Date: 2024-10-16, 17:24:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1103,9 +1103,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; L # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; L # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; L # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; L # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1214,7 +1214,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815351 code points not listed here. +# The above property value applies to 815321 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96e..66b1fbccd5 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ -# DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# DerivedCombiningClass-17.0.0.txt +# Date: 2024-10-16, 17:24:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1839,9 +1839,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE2 ; 0 # Po OLD CHINESE HOOK MARK 16FE3 ; 0 # Lm OLD CHINESE ITERATION MARK 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER -17000..187F7 ; 0 # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; 0 # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; 0 # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; 0 # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; 0 # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; 0 # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2060,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. +# The above property value applies to 821551 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaac..3475888cc1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ -# DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedEastAsianWidth-17.0.0.txt +# Date: 2024-10-16, 17:24:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2103,8 +2103,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. -# Total code points: 792420 +# The above property value applies to 761069 code points not listed here. +# Total code points: 792390 # ================================================ @@ -2508,9 +2508,9 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; W # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; W # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; W # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2580,7 +2580,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182615 +# Total code points: 182645 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca93..2fbaf6570e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ -# DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedGeneralCategory-17.0.0.txt +# Date: 2024-10-16, 17:24:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -579,9 +579,9 @@ FFFE..FFFF ; Cn # [2] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. 16FF2..16FFF ; Cn # [14] .. -187F8..187FF ; Cn # [8] .. 18CD6..18CFE ; Cn # [41] .. -18D09..1AFEF ; Cn # [8935] .. +18D1D..18D7F ; Cn # [99] .. +18D82..1AFEF ; Cn # [8814] .. 1AFF4 ; Cn # 1AFFC ; Cn # 1AFFF ; Cn # @@ -747,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819503 # ================================================ @@ -2639,9 +2639,9 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16D43..16D6A ; Lo # [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION -17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; Lo # [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; Lo # [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Lo # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO @@ -2708,7 +2708,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136477 +# Total code points: 136507 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 31d143e928..9d769f8fab 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ -# DerivedLineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:50 GMT +# DerivedLineBreak-17.0.0.txt +# Date: 2024-10-16, 17:24:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757623 code points not listed here. +# Total code points: 895091 # ================================================ @@ -1762,9 +1762,9 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 113D4..113D5 ; ID # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA 113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL -17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 +18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; ID # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1F000..1F02B ; ID # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK @@ -1853,7 +1853,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172421 +# Total code points: 172451 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b55..6b2c9d4643 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ -# DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedName-17.0.0.txt +# Date: 2024-10-16, 17:24:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -36951,7 +36951,7 @@ FFFD ; REPLACEMENT CHARACTER 16FE4 ; KHITAN SMALL SCRIPT FILLER 16FF0 ; VIETNAMESE ALTERNATE READING MARK CA 16FF1 ; VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; TANGUT IDEOGRAPH-* +17000..187FF ; TANGUT IDEOGRAPH-* 18800 ; TANGUT COMPONENT-001 18801 ; TANGUT COMPONENT-002 18802 ; TANGUT COMPONENT-003 @@ -37722,7 +37722,9 @@ FFFD ; REPLACEMENT CHARACTER 18AFF ; TANGUT COMPONENT-768 18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* 18CFF ; KHITAN SMALL SCRIPT CHARACTER-* -18D00..18D08 ; TANGUT IDEOGRAPH-* +18D00..18D1C ; TANGUT IDEOGRAPH-* +18D80 ; TANGUT COMPONENT-769 +18D81 ; TANGUT COMPONENT-770 1AFF0 ; KATAKANA LETTER MINNAN TONE-2 1AFF1 ; KATAKANA LETTER MINNAN TONE-3 1AFF2 ; KATAKANA LETTER MINNAN TONE-4 @@ -45367,6 +45369,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 155028 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 55107dc2aa..21e74b88e6 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -488,6 +488,7 @@ public enum Block_Values implements Named { Tangsa("Tangsa"), Tangut("Tangut"), Tangut_Components("Tangut_Components"), + Tangut_Components_Supplement("Tangut_Components_Sup"), Tangut_Supplement("Tangut_Sup"), Telugu("Telugu"), Thaana("Thaana"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index 7fea779e69..a3ea48f261 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -724,6 +724,7 @@ private static void generateUnicodeData(String filename) throws IOException { final BagFormatter bf = new BagFormatter(); bf.setHexValue(false) .setMergeRanges(true) + .setRangeBreakSource(null) .setNoSpacesBeforeSemicolon() .setMinSpacesAfterSemicolon(0) .setUnicodeDataStyleRanges(true) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index ec0abca407..a3ae7da1dc 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1309,6 +1309,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // Unicode 12 added TANGUT IDEOGRAPH-187F2..TANGUT IDEOGRAPH-187F7. return TANGUT_BASE; } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. + if (ch <= 0x187FF && rCompositeVersion >= 0x100000) { + // Unicode [..] added TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF. + return TANGUT_BASE; + } } if (rCompositeVersion >= 0xd0000) { @@ -1321,6 +1326,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { if (ch <= 0x18D08) { return TANGUT_SUP_BASE; // 18D00..18D08 Tangut Ideograph Supplement } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. + if (ch <= 0x18D1C && rCompositeVersion >= 0x100000) { + // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. + return TANGUT_SUP_BASE; + } } // 20000..2A6DF; CJK Unified Ideographs Extension B diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index b1ffb8261a..70c9ea186f 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -28,10 +28,16 @@ public interface UCD_Types { // Unicode 12: // 17000;;Lo;0;L;;;;;N;;;;; // 187F7;;Lo;0;L;;;;;N;;;;; + // Unicode [..]: + // 17000;;Lo;0;L;;;;;N;;;;; + // 187FF;;Lo;0;L;;;;;N;;;;; public static final int TANGUT_SUP_BASE = 0x18D00; // Unicode 13: // 18D00;;Lo;0;L;;;;;N;;;;; // 18D08;;Lo;0;L;;;;;N;;;;; + // Unicode [..]: + // 18D00;;Lo;0;L;;;;;N;;;;; + // 18D1C;;Lo;0;L;;;;;N;;;;; public static final int // 4E00;;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index 357d1bf6df..9ba202a241 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -873,6 +873,7 @@ Format: kenFile skipValue=Rotated # Anatolian Hieroglyphs: U+14400..U+1467F # Ideographic Symbols & Tangut: U+16FE0..U+18AFF # Khitan Small Script & Tangut Sup: U+18B00..U+18D7F +# Tangut Components Supplement: U+18D80..U+18DFF # Kana Extended-B: U+1AFF0..U+1AFFF # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index eaa03a0f7e..8f86e455f7 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -308,6 +308,7 @@ Tamil ; Tamil Tamil_Sup ; Tamil_Supplement Tangut ; Tangut Tangut_Components ; Tangut_Components +Tangut_Components_Sup ; Tangut_Components_Supplement Tangut_Sup ; Tangut_Supplement Telugu ; Telugu Thaana ; Thaana