From 5d745c92f1cb18a06e9ecd2b2cc2913c9d3c3059 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 24 Apr 2014 15:29:47 +0700 Subject: [PATCH 01/15] Introduce syllable.py --- bogo/syllable.py | 49 ++++++++++++++++++++++++++++++++++++++ bogo/test/test_syllable.py | 35 +++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 bogo/syllable.py create mode 100644 bogo/test/test_syllable.py diff --git a/bogo/syllable.py b/bogo/syllable.py new file mode 100644 index 0000000..d34e137 --- /dev/null +++ b/bogo/syllable.py @@ -0,0 +1,49 @@ +import collections +from bogo import utils + + +Syllable = \ + collections.namedtuple('Syllable', + ['initial_consonant', 'vowel', 'final_consonant']) + + +def parse_syllable(string): + """\ + Make a Syllable from a string. + + Args: + - string: the string to be parsed + + Returns: + a Syllable + + >>> parse_syllable('tuong') + ('t','uo','ng') + >>> parse_syllable('ohmyfkinggod') + ('ohmyfkingg','o','d') + """ + + def atomic_separate(string, last_chars, last_is_vowel): + if string == "" or (last_is_vowel != utils.is_vowel(string[-1])): + return (string, last_chars) + else: + return atomic_separate(string[:-1], + string[-1] + last_chars, last_is_vowel) + + head, last_consonant = atomic_separate(string, "", False) + first_consonant, vowel = atomic_separate(head, "", True) + + if last_consonant and not (vowel + first_consonant): + first_consonant = last_consonant + last_consonant = '' + + # 'gi' and 'qu' are considered qualified consonants. + # We want something like this: + # ['g', 'ia', ''] -> ['gi', 'a', ''] + # ['q', 'ua', ''] -> ['qu', 'a', ''] + if len(vowel) > 1 and \ + (first_consonant + vowel[0]).lower() in ['gi', 'qu']: + first_consonant += vowel[0] + vowel = vowel[1:] + + return Syllable(first_consonant, vowel, last_consonant) diff --git a/bogo/test/test_syllable.py b/bogo/test/test_syllable.py new file mode 100644 index 0000000..42fc688 --- /dev/null +++ b/bogo/test/test_syllable.py @@ -0,0 +1,35 @@ +from nose.tools import eq_ +from bogo.syllable import Syllable, parse_syllable + + +class TestSyllable(): + + def test_parse_simple_syllable(self): + parsed = parse_syllable('tuong') + + expected = Syllable('t', 'uo', 'ng') + eq_(parsed, expected) + + def test_parse_qua(self): + parsed = parse_syllable('qua') + + expected = Syllable('qu', 'a', '') + eq_(parsed, expected) + + def test_parse_gia(self): + parsed = parse_syllable('gia') + + expected = Syllable('gi', 'a', '') + eq_(parsed, expected) + + def test_parse_gi(self): + parsed = parse_syllable('gi') + + expected = Syllable('g', 'i', '') + eq_(parsed, expected) + + def test_parse_rubbish(self): + parsed = parse_syllable('ohmyfkinggod') + + expected = Syllable('ohmyfkingg', 'o', 'd') + eq_(parsed, expected) From 2efcfc5b7ec8d1686b54405208d6ecc9339d6c3d Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sat, 26 Apr 2014 00:24:26 +0700 Subject: [PATCH 02/15] Refactor Syllable --- bogo/syllable.py | 109 ++++++++++++++++++++++++------------- bogo/test/test_syllable.py | 47 ++++++++++++++-- 2 files changed, 112 insertions(+), 44 deletions(-) diff --git a/bogo/syllable.py b/bogo/syllable.py index d34e137..b4a152f 100644 --- a/bogo/syllable.py +++ b/bogo/syllable.py @@ -2,48 +2,81 @@ from bogo import utils -Syllable = \ - collections.namedtuple('Syllable', - ['initial_consonant', 'vowel', 'final_consonant']) +class Syllable(collections.namedtuple('Syllable', + ['initial_consonant', 'vowel', 'final_consonant'])): + @staticmethod + def new_from_string(string): + """\ + Make a Syllable from a string. -def parse_syllable(string): - """\ - Make a Syllable from a string. + Args: + - string: the string to be parsed - Args: - - string: the string to be parsed + Returns: + a Syllable - Returns: - a Syllable + >>> parse_syllable('tuong') + ('t','uo','ng') + >>> parse_syllable('ohmyfkinggod') + ('ohmyfkingg','o','d') + """ + def atomic_separate(string, last_chars, last_is_vowel): + if string == "" or (last_is_vowel != utils.is_vowel(string[-1])): + return (string, last_chars) + else: + return atomic_separate(string[:-1], + string[-1] + last_chars, last_is_vowel) - >>> parse_syllable('tuong') - ('t','uo','ng') - >>> parse_syllable('ohmyfkinggod') - ('ohmyfkingg','o','d') - """ + head, last_consonant = atomic_separate(string, "", False) + first_consonant, vowel = atomic_separate(head, "", True) - def atomic_separate(string, last_chars, last_is_vowel): - if string == "" or (last_is_vowel != utils.is_vowel(string[-1])): - return (string, last_chars) + if last_consonant and not (vowel + first_consonant): + first_consonant = last_consonant + last_consonant = '' + + # 'gi' and 'qu' are considered qualified consonants. + # We want something like this: + # ['g', 'ia', ''] -> ['gi', 'a', ''] + # ['q', 'ua', ''] -> ['qu', 'a', ''] + if len(vowel) > 1 and \ + (first_consonant + vowel[0]).lower() in ['gi', 'qu']: + first_consonant += vowel[0] + vowel = vowel[1:] + + return Syllable(first_consonant, vowel, last_consonant) + + + def append_char(self, char): + """ + Append a character to `comps` following this rule: a vowel is added + to the vowel part if there is no last consonant, else to the last + consonant part; a consonant is added to the first consonant part + if there is no vowel, and to the last consonant part if the + vowel part is not empty. + + >>> transform(['', '', '']) + ['c', '', ''] + >>> transform(['c', '', ''], '+o') + ['c', 'o', ''] + >>> transform(['c', 'o', ''], '+n') + ['c', 'o', 'n'] + >>> transform(['c', 'o', 'n'], '+o') + ['c', 'o', 'no'] + """ + initial_consonant = self.initial_consonant + vowel = self.vowel + final_consonant = self.final_consonant + + if utils.is_vowel(char): + if not self.final_consonant: + vowel = self.vowel + char + else: + final_consonant = self.final_consonant + char else: - return atomic_separate(string[:-1], - string[-1] + last_chars, last_is_vowel) - - head, last_consonant = atomic_separate(string, "", False) - first_consonant, vowel = atomic_separate(head, "", True) - - if last_consonant and not (vowel + first_consonant): - first_consonant = last_consonant - last_consonant = '' - - # 'gi' and 'qu' are considered qualified consonants. - # We want something like this: - # ['g', 'ia', ''] -> ['gi', 'a', ''] - # ['q', 'ua', ''] -> ['qu', 'a', ''] - if len(vowel) > 1 and \ - (first_consonant + vowel[0]).lower() in ['gi', 'qu']: - first_consonant += vowel[0] - vowel = vowel[1:] - - return Syllable(first_consonant, vowel, last_consonant) + if not self.final_consonant and not self.vowel: + initial_consonant = self.initial_consonant + char + else: + final_consonant = self.final_consonant + char + + return Syllable(initial_consonant, vowel, final_consonant) diff --git a/bogo/test/test_syllable.py b/bogo/test/test_syllable.py index 42fc688..15821c6 100644 --- a/bogo/test/test_syllable.py +++ b/bogo/test/test_syllable.py @@ -1,35 +1,70 @@ from nose.tools import eq_ -from bogo.syllable import Syllable, parse_syllable +from bogo.syllable import Syllable class TestSyllable(): def test_parse_simple_syllable(self): - parsed = parse_syllable('tuong') + parsed = Syllable.new_from_string('tuong') expected = Syllable('t', 'uo', 'ng') eq_(parsed, expected) def test_parse_qua(self): - parsed = parse_syllable('qua') + parsed = Syllable.new_from_string('qua') expected = Syllable('qu', 'a', '') eq_(parsed, expected) def test_parse_gia(self): - parsed = parse_syllable('gia') + parsed = Syllable.new_from_string('gia') expected = Syllable('gi', 'a', '') eq_(parsed, expected) def test_parse_gi(self): - parsed = parse_syllable('gi') + parsed = Syllable.new_from_string('gi') expected = Syllable('g', 'i', '') eq_(parsed, expected) def test_parse_rubbish(self): - parsed = parse_syllable('ohmyfkinggod') + parsed = Syllable.new_from_string('ohmyfkinggod') expected = Syllable('ohmyfkingg', 'o', 'd') eq_(parsed, expected) + + def test_append_initial_consonant(self): + s = Syllable('c', '', '') + s = s.append_char('c') + + expected = Syllable('cc', '', '') + eq_(s, expected) + + def test_append_initial_consonant_empty(self): + s = Syllable('', '', '') + s = s.append_char('c') + + expected = Syllable('c', '', '') + eq_(s, expected) + + def test_append_vowel(self): + s = Syllable('c', 'a', '') + s = s.append_char('a') + + expected = Syllable('c', 'aa', '') + eq_(s, expected) + + def test_append_vowel_empty(self): + s = Syllable('', '', '') + s = s.append_char('a') + + expected = Syllable('', 'a', '') + eq_(s, expected) + + def test_append_final_consonant(self): + s = Syllable('c', 'a', 'c') + s = s.append_char('c') + + expected = Syllable('c', 'a', 'cc') + eq_(s, expected) \ No newline at end of file From e49113f77af0d906477db688fec8d00eba334f4a Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sat, 26 Apr 2014 13:04:53 +0700 Subject: [PATCH 03/15] Introduce the keep_case decorator --- bogo/test/test_utils.py | 59 +++++++++++++++++++++++++++++++++++++++++ bogo/utils.py | 26 ++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/bogo/test/test_utils.py b/bogo/test/test_utils.py index a96f6b6..ba5d76d 100644 --- a/bogo/test/test_utils.py +++ b/bogo/test/test_utils.py @@ -47,3 +47,62 @@ def test_separate(): eq_(separate('xẻng'), ['x', 'ẻ', 'ng']) eq_(separate('xoáy'), ['x', 'oáy', '']) eq_(separate('quây'), ['qu', 'ây', '']) + + +class TestKeepCase(): + + def test_keep_lower(self): + + @keep_case + def function(string): + return string.upper() + + eq_(function("abc"), "abc") + + def test_keep_title(self): + + @keep_case + def function(string): + return string.upper() + + eq_(function("Abc"), "Abc") + + def test_keep_upper(self): + + @keep_case + def function(string): + return string.title() + + eq_(function("ABC"), "ABC") + + def test_multiple_arguments(self): + + @keep_case + def function(string, arg1, arg2, kwarg1=True): + return "{} {} {} {}".format(string, arg1, arg2, kwarg1) + + result = function("abc", 1, 2, 3) + expected = "abc 1 2 3" + + eq_(result, expected) + + def test_normalize_case(self): + """ + Test that the string argument is always normalized to lower case. + """ + inner = [0] + + @keep_case + def function(string): + inner[0] = string + return string + + function("ABC") + eq_(inner[0], "abc") + + def test_unrecognized_case(self): + @keep_case + def function(string): + return string + + eq_(function("aBcD"), "abcd") diff --git a/bogo/utils.py b/bogo/utils.py index f3b84cb..d70e661 100644 --- a/bogo/utils.py +++ b/bogo/utils.py @@ -129,3 +129,29 @@ def atomic_separate(string, last_chars, last_is_vowel): comps[1] = comps[1][1:] return comps + + +def keep_case(function): + """ + Decorator to ensure that the letter case of the input and + output of a function stays the same. + + This function assumes that the decorated function takes + a string as the first argument and returns a modified + version of it. Also, the string argument will be normalized + to lower case before being passed to the decorated function. + """ + + def inner(string, *args, **kwargs): + restore_case = { + True: str.__str__, # fallback if the string is empty + string.isupper(): str.upper, + string.islower(): str.lower, + string.istitle(): str.title + }[True] + + modified_string = function(string.lower(), *args, **kwargs) + + return restore_case(modified_string) + + return inner From 100e5cbd9740482907272f05c84cf91c19c345e8 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sat, 26 Apr 2014 13:05:09 +0700 Subject: [PATCH 04/15] Refactor accent.py and add more tests --- bogo/accent.py | 86 +++++++++++++++++++++------------------- bogo/test/test_accent.py | 83 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 40 deletions(-) diff --git a/bogo/accent.py b/bogo/accent.py index 832f0ba..f703e5b 100644 --- a/bogo/accent.py +++ b/bogo/accent.py @@ -3,7 +3,7 @@ # This file is part of ibus-bogo project. # # Copyright (C) 2012 Long T. Dam -# Copyright (C) 2012-2013 Trung Ngo +# Copyright (C) 2012-2014 Trung Ngo # Copyright (C) 2013 Duong H. Nguyen # # ibus-bogo is free software: you can redistribute it and/or modify @@ -21,7 +21,7 @@ # """ -Utility functions to deal with accents (should have been called tones), +Utility functions to deal with accents (also called tones), which are diacritical markings that changes the pitch of a character. E.g. the acute accent in á. """ @@ -31,9 +31,11 @@ from __future__ import unicode_literals from bogo import utils +from bogo.syllable import Syllable class Accent: + MAX_VALUE = 6 GRAVE = 5 ACUTE = 4 HOOK = 3 @@ -62,61 +64,65 @@ def get_accent_string(string): return accents[-1] if accents else Accent.NONE -def add_accent(components, accent): +def add_accent(syllable, accent): """ - Add accent to the given components. The parameter components is - the result of function separate() + Add accent to the given syllable. """ - vowel = components[1] - last_consonant = components[2] + vowel = syllable.vowel + + if not vowel: + return syllable + if accent == Accent.NONE: vowel = remove_accent_string(vowel) - return [components[0], vowel, last_consonant] + return Syllable(syllable.initial_consonant, vowel, syllable.final_consonant) - if vowel == "": - return components - #raw_string is a list, not a str object - raw_string = remove_accent_string(vowel).lower() - new_vowel = "" + vowel_wo_accent = remove_accent_string(vowel).lower() + new_vowel = '' + # Highest priority for ê and ơ - index = max(raw_string.find("ê"), raw_string.find("ơ")) - if index != -1: - new_vowel = vowel[:index] + add_accent_char(vowel[index], accent) + vowel[index+1:] - elif len(vowel) == 1 or (len(vowel) == 2 and last_consonant == ""): - new_vowel = add_accent_char(vowel[0], accent) + vowel[1:] + index = max(vowel_wo_accent.find("ê"), vowel_wo_accent.find("ơ")) + found_e_hat_or_o_horn = index != -1 + + if found_e_hat_or_o_horn: + # Add accent mark to the found ê or ơ + new_vowel = \ + vowel[:index] + \ + add_accent_char(vowel[index], accent) + \ + vowel[index + 1:] + elif len(vowel) == 1 or (len(vowel) == 2 and not syllable.final_consonant): + # cá + # cháo + first_vowel_char = vowel[0] + first_vowel_char_with_accent = add_accent_char(first_vowel_char, accent) + new_vowel = first_vowel_char_with_accent + vowel[1:] else: - new_vowel = vowel[:1] + add_accent_char(vowel[1], accent) + vowel[2:] - return [components[0], new_vowel, components[2]] + # biến + # khuỷu + second_vowel_char = vowel[1] + second_vowel_char_with_accent = add_accent_char(second_vowel_char, accent) + new_vowel = vowel[:1] + second_vowel_char_with_accent + vowel[2:] + return Syllable(syllable.initial_consonant, new_vowel, syllable.final_consonant) + +@utils.keep_case def add_accent_char(char, accent): """ - Add accent to a single char. Parameter accent is member of class - Accent + Add accent to a single char. + + Args: + accent: an Accent enum value """ - if char == "": - return "" - case = char.isupper() - char = char.lower() + if not (char and accent in range(0, Accent.MAX_VALUE + 1)): + return char + index = utils.VOWELS.find(char) if (index != -1): index = index - index % 6 + 5 char = utils.VOWELS[index - accent] - return utils.change_case(char, case) - -def add_accent_at(string, accent, index): - """ - Add mark to the index-th character of the given string. Return - the new string after applying change. - (unused) - """ - if index == -1: - return string - # Python can handle the case which index is out of range of given string - return string[:index] + \ - accent.accent.add_accent_char(string[index], accent) + \ - string[index+1:] + return char def remove_accent_char(char): diff --git a/bogo/test/test_accent.py b/bogo/test/test_accent.py index 4a574b3..f814a2c 100644 --- a/bogo/test/test_accent.py +++ b/bogo/test/test_accent.py @@ -1,3 +1,86 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +from nose.tools import eq_ +from bogo.accent import add_accent, add_accent_char, Accent +from bogo.syllable import Syllable + + +class TestAddAccentChar(): + + def test_empty_char(self): + result = add_accent_char('', Accent.GRAVE) + expected = '' + eq_(result, expected) + + def test_out_of_range_accent(self): + result = add_accent_char('a', 293432) + expected = 'a' + eq_(result, expected) + + def test_normal_accent(self): + result = add_accent_char('a', Accent.ACUTE) + expected = 'á' + eq_(result, expected) + + def test_upper_case(self): + eq_(add_accent_char('A', Accent.ACUTE), 'Á') + + +class TestAddAccent(): + def test_remove_accent(self): + s = Syllable('c', 'á', 'c') + + result = add_accent(s, Accent.NONE) + expected = Syllable('c', 'a', 'c') + + eq_(result, expected) + + def test_e_hat(self): + s = Syllable('ch', 'uyê', 'n') + + result = add_accent(s, Accent.HOOK) + expected = Syllable('ch', 'uyể', 'n') + + eq_(result, expected) + + def test_o_horn(self): + s = Syllable('ch', 'ươ', 'ng') + + result = add_accent(s, Accent.HOOK) + expected = Syllable('ch', 'ưở', 'ng') + + eq_(result, expected) + + def test_double_vowel_no_final_consonant(self): + s = Syllable('c', 'ua', '') + + result = add_accent(s, Accent.HOOK) + expected = Syllable('c', 'ủa', '') + + eq_(result, expected) + + def test_double_vowel_with_final_consonant(self): + s = Syllable('c', 'uô', 'ng') + + result = add_accent(s, Accent.GRAVE) + expected = Syllable('c', 'uồ', 'ng') + + eq_(result, expected) + + def test_single_vowel(self): + s = Syllable('c', 'a', '') + + result = add_accent(s, Accent.ACUTE) + expected = Syllable('c', 'á', '') + + eq_(result, expected) + + s = Syllable('c', 'a', 'n') + + result = add_accent(s, Accent.ACUTE) + expected = Syllable('c', 'á', 'n') + + eq_(result, expected) + + \ No newline at end of file From 32139345bc9ff36a54d077d429ce738ac922d32e Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 02:11:03 +0700 Subject: [PATCH 05/15] First phase of refactoring to change all comp/components to Syllable --- bogo/core.py | 124 ++++++++++++++++++++--------------------------- bogo/mark.py | 44 ++++++++++------- bogo/syllable.py | 2 + 3 files changed, 81 insertions(+), 89 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index 697e3ba..a70d153 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -26,8 +26,8 @@ from __future__ import unicode_literals from bogo.validation import is_valid_combination +from bogo.syllable import Syllable from bogo import utils, accent, mark -import logging import sys import string @@ -204,37 +204,30 @@ def process_key(string, key, effect strings. Although you should try to avoid this if you are defining a custom input method rule. """ - # TODO Figure out a way to remove the `string` argument. Perhaps only the - # key sequence is needed? - def default_return(): - return string + key, fallback_sequence + key if rules is None: rules = get_telex_definition() - comps = utils.separate(string) - - # if not _is_processable(comps): - # return default_return() + syl = Syllable.new_from_string(string) # Find all possible transformations this keypress can generate trans_list = _get_transformation_list( key, rules, fallback_sequence) # Then apply them one by one - new_comps = list(comps) + new_syl = syl for trans in trans_list: - new_comps = _transform(new_comps, trans) + new_syl = _transform(new_syl, trans) - if new_comps == comps: - tmp = list(new_comps) + if new_syl == syl: + tmp = new_syl # If none of the transformations (if any) work # then this keystroke is probably an undo key. - if _can_undo(new_comps, trans_list): + if _can_undo(new_syl, trans_list): # The prefix "_" means undo. for trans in map(lambda x: "_" + x, trans_list): - new_comps = _transform(new_comps, trans) + new_syl = _transform(new_syl, trans) # Undoing the w key with the TELEX input method with the # w:<ư extension requires some care. @@ -252,7 +245,7 @@ def is_telex_like(): return '<ư' in rules["w"] def undone_vowel_ends_with_u(): - return new_comps[1] and new_comps[1][-1].lower() == "u" + return new_syl[1] and new_syl[1][-1].lower() == "u" def not_first_key_press(): return len(fallback_sequence) >= 1 @@ -269,21 +262,21 @@ def user_didnt_type_uww(): undone_vowel_ends_with_u() and \ user_typed_ww() and \ user_didnt_type_uww(): - # The vowel part of new_comps is supposed to end with + # The vowel part of new_syl is supposed to end with # u now. That u should be removed. - new_comps[1] = new_comps[1][:-1] + new_syl[1] = new_syl[1][:-1] - if tmp == new_comps: + if tmp == new_syl: fallback_sequence += key - new_comps = utils.append_comps(new_comps, key) + new_syl = utils.append_comps(new_syl, key) else: fallback_sequence += key if skip_non_vietnamese is True and key.isalpha() and \ - not is_valid_combination(new_comps, final_form=False): + not is_valid_combination(new_syl, final_form=False): result = fallback_sequence, fallback_sequence else: - result = utils.join(new_comps), fallback_sequence + result = new_syl.as_string(), fallback_sequence return result @@ -364,25 +357,22 @@ def _get_action(trans): return accent_action[trans[0]] -def _transform(comps, trans): +def _transform(syllable, trans): """ Transform the given string with transform type trans """ - logging.debug("== In _transform(%s, %s) ==", comps, trans) - components = list(comps) action, parameter = _get_action(trans) if action == _Action.ADD_MARK and \ - components[2] == "" and \ - mark.strip(components[1]).lower() in ['oe', 'oa'] and trans == "o^": + syllable.final_consonant == "" and \ + mark.strip(syllable.vowel).lower() in ['oe', 'oa'] and \ + trans == "o^": action, parameter = _Action.ADD_CHAR, trans[0] if action == _Action.ADD_ACCENT: - logging.debug("add_accent(%s, %s)", components, parameter) - components = accent.add_accent(components, parameter) - elif action == _Action.ADD_MARK and mark.is_valid_mark(components, trans): - logging.debug("add_mark(%s, %s)", components, parameter) - components = mark.add_mark(components, parameter) + syllable = accent.add_accent(syllable, parameter) + elif action == _Action.ADD_MARK and mark.is_valid_mark(syllable, trans): + syllable = mark.add_mark(syllable, parameter) # Handle uơ in "huơ", "thuở", "quở" # If the current word has no last consonant and the first consonant @@ -392,48 +382,39 @@ def _transform(comps, trans): # # NOTE: In the dictionary, these are the only words having this strange # vowel so we don't need to worry about other cases. - if accent.remove_accent_string(components[1]).lower() == "ươ" and \ - not components[2] and components[0].lower() in ["", "h", "th", "kh"]: + if accent.remove_accent_string(syllable.vowel).lower() == "ươ" and \ + not syllable.final_consonant and \ + syllable.initial_consonant.lower() in ["", "h", "th", "kh"]: # Backup accents - ac = accent.get_accent_string(components[1]) - components[1] = ("u", "U")[components[1][0].isupper()] + components[1][1] - components = accent.add_accent(components, ac) + akzent = accent.get_accent_string(syllable.vowel) + syllable = Syllable( + syllable.initial_consonant, + mark.strip(syllable.vowel[0]) + syllable.vowel[1]) + syllable = accent.add_accent(syllable, akzent) elif action == _Action.ADD_CHAR: - if trans[0] == "<": - if not components[2]: - # Only allow ư, ơ or ươ sitting alone in the middle part - # and ['g', 'i', '']. If we want to type giowf = 'giờ', separate() - # will create ['g', 'i', '']. Therefore we have to allow - # components[1] == 'i'. - if (components[0].lower(), components[1].lower()) == ('g', 'i'): - components[0] += components[1] - components[1] = '' - if not components[1] or \ - (components[1].lower(), trans[1].lower()) == ('ư', 'ơ'): - components[1] += trans[1] - else: - components = utils.append_comps(components, parameter) - if parameter.isalpha() and \ - accent.remove_accent_string(components[1]).lower().startswith("uơ"): - ac = accent.get_accent_string(components[1]) - components[1] = ('ư', 'Ư')[components[1][0].isupper()] + \ - ('ơ', 'Ơ')[components[1][1].isupper()] + components[1][2:] - components = accent.add_accent(components, ac) + syllable = syllable.append_char(parameter) + if parameter.isalpha() and \ + accent.remove_accent_string(syllable.vowel) \ + .lower().startswith("uơ"): + ac = accent.get_accent_string(syllable.vowel) + # components[1] = ('ư', 'Ư')[components[1][0].isupper()] + \ + # ('ơ', 'Ơ')[components[1][1].isupper()] + components[1][2:] + # components = accent.add_accent(components, ac) elif action == _Action.UNDO: - components = _reverse(components, trans[1:]) + syllable = _reverse(syllable, trans[1:]) - if action == _Action.ADD_MARK or (action == _Action.ADD_CHAR and parameter.isalpha()): + if action == _Action.ADD_MARK or \ + (action == _Action.ADD_CHAR and parameter.isalpha()): # If there is any accent, remove and reapply it # because it is likely to be misplaced in previous transformations - ac = accent.get_accent_string(components[1]) + akzent = accent.get_accent_string(syllable.vowel) - if ac != accent.Accent.NONE: - components = accent.add_accent(components, Accent.NONE) - components = accent.add_accent(components, ac) + if akzent != accent.Accent.NONE: + syllable = accent.add_accent(syllable, Accent.NONE) + syllable = accent.add_accent(syllable, ac) - logging.debug("After transform: %s", components) - return components + return syllable def _reverse(components, trans): @@ -468,14 +449,13 @@ def _reverse(components, trans): return comps -def _can_undo(comps, trans_list): +def _can_undo(syllable, trans_list): """ Return whether a components can be undone with one of the transformation in trans_list. """ - comps = list(comps) - accent_list = list(map(accent.get_accent_char, comps[1])) - mark_list = list(map(mark.get_mark_char, utils.join(comps))) + accent_list = list(map(accent.get_accent_char, syllable.vowel)) + mark_list = list(map(mark.get_mark_char, syllable.as_string())) action_list = list(map(lambda x: _get_action(x), trans_list)) def atomic_check(action): @@ -484,8 +464,8 @@ def atomic_check(action): in `comps`. """ return (action[0] == _Action.ADD_ACCENT and action[1] in accent_list) \ - or (action[0] == _Action.ADD_MARK and action[1] in mark_list) \ - or (action[0] == _Action.ADD_CHAR and action[1] == \ - accent.remove_accent_char(comps[1][-1])) # ơ, ư + or (action[0] == _Action.ADD_MARK and action[1] in mark_list) \ + or (action[0] == _Action.ADD_CHAR and action[1] == + accent.remove_accent_char(syllable.vowel[-1])) # ơ, ư return any(map(atomic_check, action_list)) diff --git a/bogo/mark.py b/bogo/mark.py index bdc63f7..758754d 100644 --- a/bogo/mark.py +++ b/bogo/mark.py @@ -29,6 +29,7 @@ from __future__ import unicode_literals from bogo import accent, utils +from bogo.syllable import Syllable Accent = accent.Accent @@ -66,35 +67,44 @@ def get_mark_char(char): # TODO: Monstrous code. Needs refactoring. -def add_mark(components, mark): - comp = list(components) - if mark == Mark.BAR and comp[0] and comp[0][-1].lower() in FAMILY_D: - comp[0] = add_mark_at(comp[0], len(comp[0])-1, Mark.BAR) +def add_mark(syllable, mark): + new_initial_consonant, new_vowel, new_final_consonant = syllable + + if mark == Mark.BAR and \ + syllable.initial_consonant and \ + syllable.initial_consonant[-1].lower() in FAMILY_D: + new_initial_consonant = add_mark_at( + syllable.initial_consonant, + len(syllable.initial_consonant) - 1, + Mark.BAR) else: - #remove all marks and accents in vowel part - raw_vowel = accent.add_accent(comp, Accent.NONE)[1].lower() - raw_vowel = utils.join([add_mark_char(c, Mark.NONE) for c in raw_vowel]) + raw_vowel = strip(syllable.vowel) if mark == Mark.HAT: pos = max(raw_vowel.find("a"), raw_vowel.find("o"), raw_vowel.find("e")) - comp[1] = add_mark_at(comp[1], pos, Mark.HAT) + new_vowel = add_mark_at(syllable.vowel, pos, Mark.HAT) elif mark == Mark.BREVE: if raw_vowel != "ua": - comp[1] = add_mark_at(comp[1], raw_vowel.find("a"), Mark.BREVE) + new_vowel = add_mark_at( + syllable.vowel, raw_vowel.find("a"), Mark.BREVE) elif mark == Mark.HORN: if raw_vowel in ("uo", "uoi", "uou"): - comp[1] = utils.join([add_mark_char(c, Mark.HORN) for c in comp[1][:2]]) + comp[1][2:] + new_vowel = "".join( + [add_mark_char(c, Mark.HORN) for c in syllable.vowel[:2]]) \ + + syllable.vowel[2:] elif raw_vowel == "oa": - comp[1] = add_mark_at(comp[1], 1, Mark.HORN) + new_vowel = add_mark_at(syllable.vowel, 1, Mark.HORN) else: pos = max(raw_vowel.find(""), raw_vowel.find("o")) - comp[1] = add_mark_at(comp[1], pos, Mark.HORN) + new_vowel = add_mark_at(syllable.vowel, pos, Mark.HORN) if mark == Mark.NONE: - if not raw_vowel == comp[1].lower(): - comp[1] = raw_vowel - elif comp[0] and comp[0][-1] == "đ": - comp[0] = comp[0][:-1] + "d" - return comp + if not raw_vowel == syllable.vowel.lower(): + new_vowel = raw_vowel + elif syllable.initial_consonant and \ + syllable.initial_consonant[-1] == "đ": + new_initial_consonant = syllable.initial_consonant[:-1] + "d" + + return Syllable(new_initial_consonant, new_vowel, new_final_consonant) def add_mark_at(string, index, mark): diff --git a/bogo/syllable.py b/bogo/syllable.py index b4a152f..8abae5b 100644 --- a/bogo/syllable.py +++ b/bogo/syllable.py @@ -46,6 +46,8 @@ def atomic_separate(string, last_chars, last_is_vowel): return Syllable(first_consonant, vowel, last_consonant) + def as_string(self): + return self.initial_consonant + self.vowel + self.final_consonant def append_char(self, char): """ From a7a618afc9c50f8f5842937fda9eb44bcefb8e33 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 02:27:38 +0700 Subject: [PATCH 06/15] Delete dead code --- bogo/core.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index a70d153..b32457e 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -290,10 +290,6 @@ def _get_transformation_list(key, im, fallback_sequence): if entered key is not in im, return "+key", meaning appending the entered key to current text """ - # if key in im: - # lkey = key - # else: - # lkey = key.lower() lkey = key.lower() if lkey in im: From 1dc2656400d86c30f014c48779bd6512ed371f27 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 02:27:50 +0700 Subject: [PATCH 07/15] Rename --- bogo/core.py | 8 ++++---- bogo/test/test_engine.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index b32457e..70ef8c8 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -320,7 +320,7 @@ def _get_transformation_list(key, im, fallback_sequence): return ['+' + key] -def _get_action(trans): +def _parse_transformation(trans): """ Return the action inferred from the transformation `trans`. and the parameter going with this action @@ -358,7 +358,7 @@ def _transform(syllable, trans): Transform the given string with transform type trans """ - action, parameter = _get_action(trans) + action, parameter = _parse_transformation(trans) if action == _Action.ADD_MARK and \ syllable.final_consonant == "" and \ mark.strip(syllable.vowel).lower() in ['oe', 'oa'] and \ @@ -420,7 +420,7 @@ def _reverse(components, trans): string. """ - action, parameter = _get_action(trans) + action, parameter = _parse_transformation(trans) comps = list(components) string = utils.join(comps) @@ -452,7 +452,7 @@ def _can_undo(syllable, trans_list): """ accent_list = list(map(accent.get_accent_char, syllable.vowel)) mark_list = list(map(mark.get_mark_char, syllable.as_string())) - action_list = list(map(lambda x: _get_action(x), trans_list)) + action_list = list(map(lambda x: _parse_transformation(x), trans_list)) def atomic_check(action): """ diff --git a/bogo/test/test_engine.py b/bogo/test/test_engine.py index 35b871f..95ae83c 100644 --- a/bogo/test/test_engine.py +++ b/bogo/test/test_engine.py @@ -6,7 +6,7 @@ from functools import partial import codecs -from bogo.core import _Action, _get_action, process_sequence +from bogo.core import _Action, _parse_transformation, process_sequence from bogo.mark import Mark import os @@ -18,12 +18,12 @@ class TestHelpers(): def test_transform(self): pass - def test__get_action(self): + def test__parse_transformation(self): # Add mark - eq_(_get_action('a^'), (_Action.ADD_MARK, Mark.HAT)) - eq_(_get_action('a+'), (_Action.ADD_MARK, Mark.BREVE)) - eq_(_get_action('o*'), (_Action.ADD_MARK, Mark.HORN)) - eq_(_get_action('d-'), (_Action.ADD_MARK, Mark.BAR)) + eq_(_parse_transformation('a^'), (_Action.ADD_MARK, Mark.HAT)) + eq_(_parse_transformation('a+'), (_Action.ADD_MARK, Mark.BREVE)) + eq_(_parse_transformation('o*'), (_Action.ADD_MARK, Mark.HORN)) + eq_(_parse_transformation('d-'), (_Action.ADD_MARK, Mark.BAR)) def test_get_transformation_list(self): pass From b9558894855d162d8504648200ebe325b308b462 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 02:42:39 +0700 Subject: [PATCH 08/15] Fix marks --- bogo/mark.py | 2 +- bogo/test/test_mark.py | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/bogo/mark.py b/bogo/mark.py index 758754d..1da2425 100644 --- a/bogo/mark.py +++ b/bogo/mark.py @@ -78,7 +78,7 @@ def add_mark(syllable, mark): len(syllable.initial_consonant) - 1, Mark.BAR) else: - raw_vowel = strip(syllable.vowel) + raw_vowel = strip(syllable.vowel).lower() if mark == Mark.HAT: pos = max(raw_vowel.find("a"), raw_vowel.find("o"), raw_vowel.find("e")) diff --git a/bogo/test/test_mark.py b/bogo/test/test_mark.py index d98dca1..6f403da 100644 --- a/bogo/test/test_mark.py +++ b/bogo/test/test_mark.py @@ -168,10 +168,17 @@ def test_add_mark_at(self): eq_(add_mark_at('e', 0, Mark.HAT), 'ê') def test_add_mark(self): - eq_(add_mark(['d', 'uo', 'ng'], Mark.BAR), ['đ', 'uo', 'ng']) - eq_(add_mark(['d', 'uo', 'ng'], Mark.HORN), ['d', 'ươ', 'ng']) - eq_(add_mark(['d', 'uô', 'ng'], Mark.HORN), ['d', 'ươ', 'ng']) - eq_(add_mark(['d', 'Á', ''], Mark.HAT), ['d', 'Ấ', '']) - eq_(add_mark(['d', '', ''], Mark.BAR), ['đ', '', '']) - eq_(add_mark(['D', 'uo', 'ng'], Mark.BAR), ['Đ', 'uo', 'ng']) - eq_(add_mark(['d', 'e', ''], Mark.HAT), ['d', 'ê', '']) + eq_(add_mark(Syllable('d', 'uo', 'ng'), Mark.BAR), + Syllable('đ', 'uo', 'ng')) + eq_(add_mark(Syllable('d', 'uo', 'ng'), Mark.HORN), + Syllable('d', 'ươ', 'ng')) + eq_(add_mark(Syllable('d', 'uô', 'ng'), Mark.HORN), + Syllable('d', 'ươ', 'ng')) + eq_(add_mark(Syllable('d', 'Á', ''), Mark.HAT), + Syllable('d', 'Ấ', '')) + eq_(add_mark(Syllable('d', '', ''), Mark.BAR), + Syllable('đ', '', '')) + eq_(add_mark(Syllable('D', 'uo', 'ng'), Mark.BAR), + Syllable('Đ', 'uo', 'ng')) + eq_(add_mark(Syllable('d', 'e', ''), Mark.HAT), + Syllable('d', 'ê', '')) From e916b16b3665bbd6a922d07f54a1af8044fd3aa8 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 02:43:01 +0700 Subject: [PATCH 09/15] Minor fix in core --- bogo/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bogo/core.py b/bogo/core.py index 70ef8c8..eff2c40 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -268,7 +268,7 @@ def user_didnt_type_uww(): if tmp == new_syl: fallback_sequence += key - new_syl = utils.append_comps(new_syl, key) + new_syl = new_syl.append_char(key) else: fallback_sequence += key From 8f75b990c50780c7ca6a7e7b14421b168bc9fe1e Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 11:48:43 +0700 Subject: [PATCH 10/15] Port is_valid_mark() to syllable --- bogo/mark.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bogo/mark.py b/bogo/mark.py index 1da2425..88ef556 100644 --- a/bogo/mark.py +++ b/bogo/mark.py @@ -162,19 +162,18 @@ def add_mark_char(char, mark): return utils.change_case(new_char, case) -def is_valid_mark(comps, mark_trans): +def is_valid_mark(syllable, mark_trans): """ Check whether the mark given by mark_trans is valid to add to the components """ if mark_trans == "*_": return True - components = list(comps) - if mark_trans[0] == 'd' and components[0] \ - and components[0][-1].lower() in ("d", "đ"): + if mark_trans[0] == 'd' and syllable.initial_consonant \ + and syllable.initial_consonant[-1].lower() in ("d", "đ"): return True - elif components[1] != "" and \ - strip(components[1]).lower().find(mark_trans[0]) != -1: + elif syllable.vowel and \ + strip(syllable.vowel).lower().find(mark_trans[0]) != -1: return True else: return False From 6dd159a5f7b035d5718451a142aa32e2aabb9d2c Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 17:54:40 +0700 Subject: [PATCH 11/15] Rewrite core.py --- bogo/__init__.py | 1 - bogo/core.py | 405 ++++++++++----------------------------- bogo/test/test_core.py | 76 ++++++++ bogo/test/test_engine.py | 191 ------------------ 4 files changed, 179 insertions(+), 494 deletions(-) create mode 100644 bogo/test/test_core.py delete mode 100644 bogo/test/test_engine.py diff --git a/bogo/__init__.py b/bogo/__init__.py index d507d7c..814a955 100644 --- a/bogo/__init__.py +++ b/bogo/__init__.py @@ -25,7 +25,6 @@ """ from bogo.core import \ - process_key, \ process_sequence, \ get_telex_definition, \ get_vni_definition diff --git a/bogo/core.py b/bogo/core.py index eff2c40..ae8e288 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -55,11 +55,11 @@ def get_telex_definition(w_shorthand=True, brackets_shorthand=True): Returns a dictionary to be passed into process_key(). """ telex = { - "a": "a^", - "o": "o^", - "e": "e^", - "w": ["u*", "o*", "a+"], - "d": "d-", + "a": "^", + "o": "^", + "e": "^", + "w": ["*", "("], + "d": "-", "f": "\\", "s": "/", "r": "?", @@ -127,341 +127,142 @@ def process_sequence(sequence, i.e. process_sequence('con meof.ddieen') should work. """ result = "" - raw = result result_parts = [] if rules is None: rules = get_telex_definition() accepted_chars = _accepted_chars(rules) + rules = Rule(rules) + bg = BoGo(rules) for key in sequence: if key not in accepted_chars: result_parts.append(result) result_parts.append(key) result = "" - raw = "" else: - result, raw = process_key( - string=result, - key=key, - fallback_sequence=raw, - rules=rules, - skip_non_vietnamese=skip_non_vietnamese) + result = bg.add_key(key) result_parts.append(result) return ''.join(result_parts) -def process_key(string, key, - fallback_sequence="", rules=None, - skip_non_vietnamese=True): - """Process a keystroke. +class Transformation: - Args: - string: The previously processed string or "". - key: The keystroke. - fallback_sequence: The previous keystrokes. - rules (optional): A dictionary listing - transformation rules. Defaults to get_telex_definition(). - skip_non_vietnamese (optional): Whether to skip results that - doesn't seem like Vietnamese. Defaults to True. - - Returns a tuple. The first item of which is the processed - Vietnamese string, the second item is the next fallback sequence. - The two items are to be fed back into the next call of process_key() - as `string` and `fallback_sequence`. If `skip_non_vietnamese` is - True and the resulting string doesn't look like Vietnamese, - both items contain the `fallback_sequence`. - - >>> process_key('a', 'a', 'a') - (â, aa) - - Note that when a key is an undo key, it won't get appended to - `fallback_sequence`. - - >>> process_key('â', 'a', 'aa') - (aa, aa) - - `rules` is a dictionary that maps keystrokes to - their effect string. The effects can be one of the following: - - 'a^': a with circumflex (â), only affect an existing 'a family' - 'a+': a with breve (ă), only affect an existing 'a family' - 'e^': e with circumflex (ê), only affect an existing 'e family' - 'o^': o with circumflex (ô), only affect an existing 'o family' - 'o*': o with horn (ơ), only affect an existing 'o family' - 'd-': d with bar (đ), only affect an existing 'd' - '/': acute (sắc), affect an existing vowel - '\': grave (huyền), affect an existing vowel - '?': hook (hỏi), affect an existing vowel - '~': tilde (ngã), affect an existing vowel - '.': dot (nặng), affect an existing vowel - '<ư': append ư - '<ơ': append ơ - - A keystroke entry can have multiple effects, in which case the - dictionary entry's value should be a list of the possible - effect strings. Although you should try to avoid this if - you are defining a custom input method rule. - """ + def __init__(self, key): + self.key = key - if rules is None: - rules = get_telex_definition() + def perform(self, syllable): + raise NotImplementedError - syl = Syllable.new_from_string(string) - - # Find all possible transformations this keypress can generate - trans_list = _get_transformation_list( - key, rules, fallback_sequence) - - # Then apply them one by one - new_syl = syl - for trans in trans_list: - new_syl = _transform(new_syl, trans) - - if new_syl == syl: - tmp = new_syl - - # If none of the transformations (if any) work - # then this keystroke is probably an undo key. - if _can_undo(new_syl, trans_list): - # The prefix "_" means undo. - for trans in map(lambda x: "_" + x, trans_list): - new_syl = _transform(new_syl, trans) - - # Undoing the w key with the TELEX input method with the - # w:<ư extension requires some care. - # - # The input (ư, w) should be undone as w - # on the other hand, (ư, uw) should return uw. - # - # _transform() is not aware of the 2 ways to generate - # ư in TELEX and always think ư was created by uw. - # Therefore, after calling _transform() to undo ư, - # we always get ['', 'u', '']. - # - # So we have to clean it up a bit. - def is_telex_like(): - return '<ư' in rules["w"] - - def undone_vowel_ends_with_u(): - return new_syl[1] and new_syl[1][-1].lower() == "u" - - def not_first_key_press(): - return len(fallback_sequence) >= 1 - - def user_typed_ww(): - return (fallback_sequence[-1:]+key).lower() == "ww" - - def user_didnt_type_uww(): - return not (len(fallback_sequence) >= 2 and - fallback_sequence[-2].lower() == "u") - - if is_telex_like() and \ - not_first_key_press() and \ - undone_vowel_ends_with_u() and \ - user_typed_ww() and \ - user_didnt_type_uww(): - # The vowel part of new_syl is supposed to end with - # u now. That u should be removed. - new_syl[1] = new_syl[1][:-1] - - if tmp == new_syl: - fallback_sequence += key - new_syl = new_syl.append_char(key) - else: - fallback_sequence += key - if skip_non_vietnamese is True and key.isalpha() and \ - not is_valid_combination(new_syl, final_form=False): - result = fallback_sequence, fallback_sequence - else: - result = new_syl.as_string(), fallback_sequence +class AddCharTransformation(Transformation): + def __init__(self, key, char): + super(AddCharTransformation, self).__init__(key) + self.char = char - return result + def perform(self, syllable): + return syllable.append_char(self.char) -def _get_transformation_list(key, im, fallback_sequence): - """ - Return the list of transformations inferred from the entered key. The - map between transform types and keys is given by module - bogo_config (if exists) or by variable simple_telex_im +class AddToneMarkTransformation(Transformation): + def __init__(self, key, tone): + super(AddToneMarkTransformation, self).__init__(key) + self.tone = tone - if entered key is not in im, return "+key", meaning appending - the entered key to current text - """ - lkey = key.lower() + def perform(self, syllable): + return accent.add_accent(syllable, self.tone) - if lkey in im: - if isinstance(im[lkey], list): - trans_list = im[lkey] - else: - trans_list = [im[lkey]] - - for i, trans in enumerate(trans_list): - if trans[0] == '<' and key.isalpha(): - trans_list[i] = trans[0] + \ - utils.change_case(trans[1], int(key.isupper())) - - if trans_list == ['_']: - if len(fallback_sequence) >= 2: - # TODO Use takewhile()/dropwhile() to process the last IM keypress - # instead of assuming it's the last key in fallback_sequence. - t = list(map(lambda x: "_" + x, - _get_transformation_list(fallback_sequence[-2], im, - fallback_sequence[:-1]))) - # print(t) - trans_list = t - # else: - # trans_list = ['+' + key] - - return trans_list - else: - return ['+' + key] +class AddCharMarkTransformation(Transformation): + def __init__(self, key, mark): + super(AddCharMarkTransformation, self).__init__(key) + self.mark = mark -def _parse_transformation(trans): - """ - Return the action inferred from the transformation `trans`. - and the parameter going with this action - An _Action.ADD_MARK goes with a Mark - while an _Action.ADD_ACCENT goes with an Accent - """ - # TODO: VIQR-like convention + def perform(self, syllable): + return mark.add_mark(syllable, self.mark) + + +class Rule: mark_action = { - '^': (_Action.ADD_MARK, Mark.HAT), - '+': (_Action.ADD_MARK, Mark.BREVE), - '*': (_Action.ADD_MARK, Mark.HORN), - '-': (_Action.ADD_MARK, Mark.BAR), + '^': Mark.HAT, + '(': Mark.BREVE, + '*': Mark.HORN, + '-': Mark.BAR, } accent_action = { - '\\': (_Action.ADD_ACCENT, Accent.GRAVE), - '/': (_Action.ADD_ACCENT, Accent.ACUTE), - '?': (_Action.ADD_ACCENT, Accent.HOOK), - '~': (_Action.ADD_ACCENT, Accent.TIDLE), - '.': (_Action.ADD_ACCENT, Accent.DOT), + '\\': Accent.GRAVE, + '/': Accent.ACUTE, + '?': Accent.HOOK, + '~': Accent.TIDLE, + '.': Accent.DOT, } - if trans[0] in ('<', '+'): - return _Action.ADD_CHAR, trans[1] - if trans[0] == "_": - return _Action.UNDO, trans[1:] - if len(trans) == 2: - return mark_action[trans[1]] - else: - return accent_action[trans[0]] + def __init__(self, rule_dict): + self.rule_dict = rule_dict + + @staticmethod + def parse_rule_action(rule_action, key): + # Each typing rule consists of 2 parts: a predicate and + # an action associated with that predicate. + # e.g.: In the rule 'r -> ?', 'r' is the key, the predicate, + # and ? is the action (add a HOOK tone mark to the + # suitable vowel). + + if rule_action[0] == '<': + # <ư + trans = AddCharTransformation(key, rule_action[1]) + # elif rule_action[0] == "_": + # # _a^ + # trans = Transformation(_Action.UNDO, rule_action[1:]) + elif rule_action in Rule.mark_action: + # ^ + trans = AddCharMarkTransformation( + key, Rule.mark_action[rule_action]) + elif rule_action in Rule.accent_action: + # ? + trans = AddToneMarkTransformation( + key, Rule.accent_action[rule_action]) + else: + # TODO ? + raise ValueError + if type(trans) is AddCharTransformation: + if key.isupper(): + trans.key = trans.key.toupper() -def _transform(syllable, trans): - """ - Transform the given string with transform type trans - """ + return trans - action, parameter = _parse_transformation(trans) - if action == _Action.ADD_MARK and \ - syllable.final_consonant == "" and \ - mark.strip(syllable.vowel).lower() in ['oe', 'oa'] and \ - trans == "o^": - action, parameter = _Action.ADD_CHAR, trans[0] - - if action == _Action.ADD_ACCENT: - syllable = accent.add_accent(syllable, parameter) - elif action == _Action.ADD_MARK and mark.is_valid_mark(syllable, trans): - syllable = mark.add_mark(syllable, parameter) - - # Handle uơ in "huơ", "thuở", "quở" - # If the current word has no last consonant and the first consonant - # is one of "h", "th" and the vowel is "ươ" then change the vowel into - # "uơ", keeping case and accent. If an alphabet character is then added - # into the word then change back to "ươ". - # - # NOTE: In the dictionary, these are the only words having this strange - # vowel so we don't need to worry about other cases. - if accent.remove_accent_string(syllable.vowel).lower() == "ươ" and \ - not syllable.final_consonant and \ - syllable.initial_consonant.lower() in ["", "h", "th", "kh"]: - # Backup accents - akzent = accent.get_accent_string(syllable.vowel) - syllable = Syllable( - syllable.initial_consonant, - mark.strip(syllable.vowel[0]) + syllable.vowel[1]) - syllable = accent.add_accent(syllable, akzent) - - elif action == _Action.ADD_CHAR: - syllable = syllable.append_char(parameter) - if parameter.isalpha() and \ - accent.remove_accent_string(syllable.vowel) \ - .lower().startswith("uơ"): - ac = accent.get_accent_string(syllable.vowel) - # components[1] = ('ư', 'Ư')[components[1][0].isupper()] + \ - # ('ơ', 'Ơ')[components[1][1].isupper()] + components[1][2:] - # components = accent.add_accent(components, ac) - elif action == _Action.UNDO: - syllable = _reverse(syllable, trans[1:]) - - if action == _Action.ADD_MARK or \ - (action == _Action.ADD_CHAR and parameter.isalpha()): - # If there is any accent, remove and reapply it - # because it is likely to be misplaced in previous transformations - akzent = accent.get_accent_string(syllable.vowel) - - if akzent != accent.Accent.NONE: - syllable = accent.add_accent(syllable, Accent.NONE) - syllable = accent.add_accent(syllable, ac) - - return syllable - - -def _reverse(components, trans): - """ - Reverse the effect of transformation 'trans' on 'components' - If the transformation does not affect the components, return the original - string. - """ + def transformations_from_key(self, key): + if key in self.rule_dict: + return [self.parse_rule_action(rule_action, key) + for rule_action in self.rule_dict[key]] + else: + return [AddCharTransformation(key, key)] - action, parameter = _parse_transformation(trans) - comps = list(components) - string = utils.join(comps) - if action == _Action.ADD_CHAR and string[-1].lower() == parameter.lower(): - if comps[2]: - i = 2 - elif comps[1]: - i = 1 - else: - i = 0 - comps[i] = comps[i][:-1] - elif action == _Action.ADD_ACCENT: - comps = accent.add_accent(comps, Accent.NONE) - elif action == _Action.ADD_MARK: - if parameter == Mark.BAR: - comps[0] = comps[0][:-1] + \ - mark.add_mark_char(comps[0][-1:], Mark.NONE) - else: - if mark.is_valid_mark(comps, trans): - comps[1] = "".join([mark.add_mark_char(c, Mark.NONE) - for c in comps[1]]) - return comps +class BoGo: + def __init__(self, typing_rule): + self.rule = typing_rule + self.transformations = [] + self.syllable = Syllable('', '', '') + def raw_string(self): + "".join([trans.key for trans in self.transformations]) + + def result(self): + return self.syllable.as_string() + + def best_transformation(self, transformations): + return transformations[0] + + def add_key(self, key): + transformation = self.best_transformation( + self.rule.transformations_from_key(key)) + + self.syllable = transformation.perform(self.syllable) + self.transformations.append(transformation) + + return self.syllable.as_string() -def _can_undo(syllable, trans_list): - """ - Return whether a components can be undone with one of the transformation in - trans_list. - """ - accent_list = list(map(accent.get_accent_char, syllable.vowel)) - mark_list = list(map(mark.get_mark_char, syllable.as_string())) - action_list = list(map(lambda x: _parse_transformation(x), trans_list)) - - def atomic_check(action): - """ - Check if the `action` created one of the marks, accents, or characters - in `comps`. - """ - return (action[0] == _Action.ADD_ACCENT and action[1] in accent_list) \ - or (action[0] == _Action.ADD_MARK and action[1] in mark_list) \ - or (action[0] == _Action.ADD_CHAR and action[1] == - accent.remove_accent_char(syllable.vowel[-1])) # ơ, ư - - return any(map(atomic_check, action_list)) diff --git a/bogo/test/test_core.py b/bogo/test/test_core.py new file mode 100644 index 0000000..aa328a8 --- /dev/null +++ b/bogo/test/test_core.py @@ -0,0 +1,76 @@ +from __future__ import unicode_literals +from nose.tools import eq_ +from bogo import core +from bogo.syllable import Syllable +from bogo import accent +from bogo import mark + + +class TestAddCharTransformation: + + def test_add_simple_char(self): + t = core.AddCharTransformation('a', 'a') + syl = Syllable('', '', '') + + result = t.perform(syl) + + eq_(result, Syllable('', 'a', '')) + + +class TestAddToneMarkTransformation: + + def test_add_simple_tone(self): + trans = core.AddToneMarkTransformation('s', accent.Accent.ACUTE) + syl = Syllable('', 'a', '') + result = trans.perform(syl) + + eq_(result, Syllable('', 'á', '')) + + +class TestAddCharMarkTransformation: + + def test_add_simple_mark(self): + trans = core.AddCharMarkTransformation('a', mark.Mark.HAT) + syl = Syllable('', 'a', '') + result = trans.perform(syl) + + eq_(result, Syllable('', 'â', '')) + + +class TestRule: + + def test_parse_add_char(self): + result = core.Rule.parse_rule_action('<ư', 'w') + + eq_(type(result), core.AddCharTransformation) + eq_(result.char, 'ư') + eq_(result.key, 'w') + + def test_parse_add_tone(self): + result = core.Rule.parse_rule_action('?', 'r') + + eq_(type(result), core.AddToneMarkTransformation) + eq_(result.tone, accent.Accent.HOOK) + eq_(result.key, 'r') + + def test_parse_add_mark(self): + result = core.Rule.parse_rule_action('^', 'a') + + eq_(type(result), core.AddCharMarkTransformation) + eq_(result.mark, mark.Mark.HAT) + eq_(result.key, 'a') + + def test_transformations_from_key_rule_key(self): + rule = core.Rule({'w': ['*', '(']}) + trans_list = rule.transformations_from_key('w') + + eq_(len(trans_list), 2) + eq_(type(trans_list[0]), core.AddCharMarkTransformation) + eq_(type(trans_list[1]), core.AddCharMarkTransformation) + + def test_transformations_from_key_non_rule_key(self): + rule = core.Rule({'w': ['*', '(']}) + trans_list = rule.transformations_from_key('a') + + eq_(len(trans_list), 1) + eq_(type(trans_list[0]), core.AddCharTransformation) diff --git a/bogo/test/test_engine.py b/bogo/test/test_engine.py deleted file mode 100644 index 95ae83c..0000000 --- a/bogo/test/test_engine.py +++ /dev/null @@ -1,191 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import unicode_literals -from nose.tools import eq_ -from nose.plugins.attrib import attr -from functools import partial -import codecs - -from bogo.core import _Action, _parse_transformation, process_sequence -from bogo.mark import Mark -import os - - -process_key_no_skip = partial(process_sequence, skip_non_vietnamese=False) - - -class TestHelpers(): - def test_transform(self): - pass - - def test__parse_transformation(self): - # Add mark - eq_(_parse_transformation('a^'), (_Action.ADD_MARK, Mark.HAT)) - eq_(_parse_transformation('a+'), (_Action.ADD_MARK, Mark.BREVE)) - eq_(_parse_transformation('o*'), (_Action.ADD_MARK, Mark.HORN)) - eq_(_parse_transformation('d-'), (_Action.ADD_MARK, Mark.BAR)) - - def test_get_transformation_list(self): - pass - - def test_can_undo(self): - pass - - def test_reverse(self): - pass - - -class TestProcessSeq(): - def test_normal_typing(self): - eq_(process_sequence('v'), 'v') - eq_(process_sequence('aw'), 'ă') - eq_(process_sequence('w'), 'ư') - eq_(process_sequence('ow'), 'ơ') - eq_(process_sequence('oo'), 'ô') - eq_(process_sequence('Oo'), 'Ô') - eq_(process_sequence('dd'), 'đ') - eq_(process_sequence('muaf'), 'mùa') - eq_(process_sequence('Doongd'), 'Đông') - eq_(process_sequence('gif'), 'gì') - eq_(process_sequence('loAnj'), 'loẠn') - eq_(process_sequence('muongw'), 'mương') - eq_(process_sequence('qur'), 'qur') - eq_(process_sequence('Tosan'), 'Toán') - eq_(process_sequence('tusnw'), 'tứn') - eq_(process_sequence('dee'), 'dê') - eq_(process_sequence('mowis'), 'mới') - eq_(process_sequence('uwa'), 'ưa') - eq_(process_sequence('uwo'), 'ưo') - eq_(process_sequence('ddx'), 'đx') - eq_(process_sequence('hoacw'), 'hoăc') - eq_(process_sequence('cuooi'), 'cuôi') - - eq_(process_sequence('tooi'), 'tôi') - eq_(process_sequence('chuyeenr'), 'chuyển') - eq_(process_sequence('ddoonjg'), 'động') - eq_(process_sequence('nheechs'), 'nhếch') - - # uơ related - eq_(process_sequence('quowr'), 'quở') - eq_(process_sequence('huow'), 'huơ') - eq_(process_sequence('thuowr'), 'thuở') - eq_(process_sequence('QUOWR'), 'QUỞ') - eq_(process_sequence('HUOW'), 'HUƠ') - eq_(process_sequence('THUOWR'), 'THUỞ') - - # English words - eq_(process_key_no_skip('case'), 'cáe') - eq_(process_key_no_skip('reset'), 'rết') - - @attr('slow') - def test_with_dictionary(self): - def atomic(word, sequence): - eq_(word, process_sequence(sequence)) - - path = os.path.join(os.path.dirname(__file__), 'DauCu.sequences') - with codecs.open(path, "r", "utf-8") as tests: - for test in tests.read().splitlines(): - sequence, word = test.rstrip().split(":") - yield atomic, word, sequence - - def test_bugs_related(self): - # naỳ. - eq_(process_sequence('nayf.'), 'này.') - - # nguời - eq_(process_sequence('nguowif'), 'người') - eq_(process_sequence('nguwowif'), 'người') - - # thươ. - eq_(process_sequence("thuowr."), "thuở.") - - eq_(process_sequence("[["), "[") - eq_(process_sequence("[["), "[") - - # BUG #77 - eq_(process_sequence("ddiemer"), "điểm") - - # BUG #78 - eq_(process_sequence("tuoufw"), "tườu") - - # BUG #79 - eq_(process_sequence("huoswc"), "hước") - - # BUG #81 - eq_(process_sequence("khoefo"), "khoèo") - - # BUG #82 - eq_(process_sequence("uorw"), "uở") - - def test_bug_93(self): - eq_(process_sequence("{{"), "{") - eq_(process_sequence("}}"), "}") - - def test_free_key_position(self): - eq_(process_sequence('toios'), 'tối') - eq_(process_sequence('toois'), 'tối') - eq_(process_sequence('toosi'), 'tối') - - eq_(process_sequence('tuyenre'), 'tuyển') - eq_(process_sequence('tuyener'), 'tuyển') - eq_(process_sequence('tuyeren'), 'tuyển') - eq_(process_sequence('tuyerne'), 'tuyển') - eq_(process_sequence('tuyeern'), 'tuyển') - eq_(process_sequence('tuyeenr'), 'tuyển') - - eq_(process_sequence('tuwrowng'), 'tưởng') - - def test_undo(self): - eq_(process_sequence('aaa'), 'aa') - eq_(process_sequence('aww'), 'aw') - eq_(process_sequence('ass'), 'as') - eq_(process_sequence('aff'), 'af') - eq_(process_sequence('arr'), 'ar') - eq_(process_sequence('axx'), 'ax') - eq_(process_sequence('ajj'), 'aj') - eq_(process_sequence('uww'), 'uw') - eq_(process_sequence('oww'), 'ow') - - eq_(process_sequence('huww'), 'huw') - eq_(process_sequence('hww'), 'hw') - eq_(process_sequence('ww'), 'w') - eq_(process_sequence('uww'), 'uw') - - eq_(process_sequence('DDd'), 'Dd') - - eq_(process_key_no_skip('Loorngr'), 'Lôngr') - eq_(process_key_no_skip('LOorngr'), 'LÔngr') - eq_(process_key_no_skip('DDoongd'), 'Dôngd') - eq_(process_key_no_skip('DDuowngd'), 'Dươngd') - eq_(process_key_no_skip('Duowngw'), 'Duongw') - - def test_non_vn(self): - def atomic(word): - eq_(process_sequence(word), word) - - tests = [ - "system", - "Virtualbox", - "VMWare", - "Microsoft", - "Google", - "Installation", - "teardown", - "generators", - "event-driven", - "flow" - ] - - for test in tests: - yield atomic, test - - eq_(process_sequence("aans."), "ấn.") - eq_(process_sequence("aans]"), "ấn]") - # eq_(process_sequence("aans.tuongwj"), "ấn.tượng") - eq_(process_sequence("gi[f"), "giờ") - # eq_(process_sequence("taojc"), "taojc") - - def test_with_separator(self): - eq_(process_sequence('con meof dideen'), 'con mèo điên') - eq_(process_sequence('con.meof'), 'con.mèo') - eq_(process_sequence('con?meof'), 'con?mèo') From d67eebbdcbd54746c76d4ff6f3c17d9ac573e0d5 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 18:08:47 +0700 Subject: [PATCH 12/15] More tests and minor bug fix --- bogo/core.py | 3 +-- bogo/test/test_core.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index ae8e288..be1ab07 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -249,7 +249,7 @@ def __init__(self, typing_rule): self.syllable = Syllable('', '', '') def raw_string(self): - "".join([trans.key for trans in self.transformations]) + return "".join([trans.key for trans in self.transformations]) def result(self): return self.syllable.as_string() @@ -265,4 +265,3 @@ def add_key(self, key): self.transformations.append(transformation) return self.syllable.as_string() - diff --git a/bogo/test/test_core.py b/bogo/test/test_core.py index aa328a8..cb5df4e 100644 --- a/bogo/test/test_core.py +++ b/bogo/test/test_core.py @@ -74,3 +74,20 @@ def test_transformations_from_key_non_rule_key(self): eq_(len(trans_list), 1) eq_(type(trans_list[0]), core.AddCharTransformation) + + +class TestBoGo: + def test_add_key_add_char(self): + b = core.BoGo(core.Rule({})) + b.add_key('a') + + eq_(b.result(), 'a') + eq_(b.raw_string(), 'a') + + def test_add_key_add_tone(self): + b = core.BoGo(core.Rule({'s': '/'})) + b.add_key('a') + b.add_key('s') + + eq_(b.result(), 'á') + eq_(b.raw_string(), 'as') From de670c78cf211a9a3de1c052c0dcff1978df4d67 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 18:09:03 +0700 Subject: [PATCH 13/15] Change README to reflect the new object oriented design --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index d3cfcb5..2ee3e5a 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,7 @@ BoGo [![Build Status](https://travis-ci.org/BoGoEngine/bogo-python.svg?branch=master)](https://travis-ci.org/BoGoEngine/bogo-python) [![Coverage Status](https://coveralls.io/repos/BoGoEngine/bogo-python/badge.png?branch=master)](https://coveralls.io/r/BoGoEngine/bogo-python?branch=master) -BoGo is a Vietnamese input method conversion library for Python. This library -is intentionally functional with no internal state and side-effect. +BoGo is a Vietnamese input method conversion library for Python. Installation ------------ From ae01622ceb2a5cb6c22f1b81d85aa25754b366fe Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 6 Jul 2014 18:18:49 +0700 Subject: [PATCH 14/15] Rename --- bogo/core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index be1ab07..c2a98e4 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -127,7 +127,7 @@ def process_sequence(sequence, i.e. process_sequence('con meof.ddieen') should work. """ result = "" - result_parts = [] + result_chunks = [] if rules is None: rules = get_telex_definition() @@ -137,14 +137,14 @@ def process_sequence(sequence, for key in sequence: if key not in accepted_chars: - result_parts.append(result) - result_parts.append(key) + result_chunks.append(result) + result_chunks.append(key) result = "" else: result = bg.add_key(key) - result_parts.append(result) - return ''.join(result_parts) + result_chunks.append(result) + return ''.join(result_chunks) class Transformation: From 570c9cb92cd8fc977fcb33c9a616fc86db44b35a Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 10 Jul 2014 18:37:39 +0700 Subject: [PATCH 15/15] Implement the best_transformation() method and undo transformations --- bogo/core.py | 135 +++++++++++++++++++++++++++++++++++++---- bogo/test/test_core.py | 18 +++++- 2 files changed, 139 insertions(+), 14 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index c2a98e4..2a3f300 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -25,9 +25,8 @@ """ from __future__ import unicode_literals -from bogo.validation import is_valid_combination from bogo.syllable import Syllable -from bogo import utils, accent, mark +from bogo import accent, mark, validation import sys import string @@ -140,8 +139,11 @@ def process_sequence(sequence, result_chunks.append(result) result_chunks.append(key) result = "" + bg = BoGo(rules) else: result = bg.add_key(key) + if not validation.is_valid_string(result): + result = bg.raw_string() result_chunks.append(result) return ''.join(result_chunks) @@ -165,6 +167,11 @@ def perform(self, syllable): return syllable.append_char(self.char) +class ByPassTransformation(AddCharTransformation): + def __init__(self, key): + super(ByPassTransformation, self).__init__(key, key) + + class AddToneMarkTransformation(Transformation): def __init__(self, key, tone): super(AddToneMarkTransformation, self).__init__(key) @@ -183,6 +190,54 @@ def perform(self, syllable): return mark.add_mark(syllable, self.mark) +class UndoAddToneMarkTransformation(Transformation): + def __init__(self, other_trans): + super(UndoAddToneMarkTransformation, self).__init__(other_trans.key) + self.tone = other_trans.tone + + def perform(self, syllable): + # A proper syllable has exactly one tone. Undoing the + # last tone means removing all tones. + return accent.add_accent(syllable, Accent.NONE) + + +class UndoAddCharMarkTransformation(Transformation): + def __init__(self, other_trans): + super(UndoAddCharMarkTransformation, self).__init__(other_trans.key) + self.mark = other_trans.mark + self.other_trans = other_trans + + def perform(self, syllable): + new_initial_consonant, new_vowel, new_final_consonant = syllable + + if self.mark == Mark.BAR: + new_initial_consonant = syllable.initial_consonant[:-1] + \ + mark.add_mark_char(syllable.initial_consonant[-1:], Mark.NONE) + else: + new_vowel = "".join([mark.add_mark_char(c, Mark.NONE) + for c in new_vowel]) + + return Syllable(new_initial_consonant, new_vowel, new_final_consonant) + + +class UndoAddCharTransformation(Transformation): + def __init__(self, other_trans): + super(UndoAddCharTransformation, self).__init__(other_trans.key) + + def perform(self, syllable): + # Just remove the last char + return Syllable.new_from_string(syllable.as_string()[:-1]) + + +def undo_transformation_of(trans): + if type(trans) is AddToneMarkTransformation: + return UndoAddToneMarkTransformation(trans) + elif type(trans) is AddCharMarkTransformation: + return UndoAddCharMarkTransformation(trans) + elif type(trans) is AddCharTransformation: + return UndoAddCharTransformation(trans) + + class Rule: mark_action = { '^': Mark.HAT, @@ -212,6 +267,7 @@ def parse_rule_action(rule_action, key): if rule_action[0] == '<': # <ư + print(rule_action) trans = AddCharTransformation(key, rule_action[1]) # elif rule_action[0] == "_": # # _a^ @@ -234,12 +290,19 @@ def parse_rule_action(rule_action, key): return trans - def transformations_from_key(self, key): + def possible_transformations_from(self, key): if key in self.rule_dict: - return [self.parse_rule_action(rule_action, key) - for rule_action in self.rule_dict[key]] + if type(self.rule_dict[key]) is list: + transformations = \ + [self.parse_rule_action(rule_action, key) + for rule_action in self.rule_dict[key]] + else: + transformations = \ + [self.parse_rule_action(self.rule_dict[key], key)] + + return transformations + [ByPassTransformation(key)] else: - return [AddCharTransformation(key, key)] + return [ByPassTransformation(key)] class BoGo: @@ -247,6 +310,7 @@ def __init__(self, typing_rule): self.rule = typing_rule self.transformations = [] self.syllable = Syllable('', '', '') + self.undone_keys = [] def raw_string(self): return "".join([trans.key for trans in self.transformations]) @@ -254,14 +318,61 @@ def raw_string(self): def result(self): return self.syllable.as_string() - def best_transformation(self, transformations): - return transformations[0] + def best_transformation(self, key): + if key in self.undone_keys: + return ByPassTransformation(key) + + transformations = self.rule.possible_transformations_from(key) + + # Check if the same key has been used before in a transformation. + # If it has then undo it. + for trans in self.transformations: + if type(trans) is not ByPassTransformation and \ + trans.key == key: + self.undone_keys.append(key) + return undo_transformation_of(trans) + + for transformation in transformations: + if type(transformation) is AddToneMarkTransformation: + if self.syllable.vowel is not "": + return transformation + elif type(transformation) is AddCharMarkTransformation: + if transformation.mark == Mark.HAT: + if self.syllable.vowel is not "" and \ + self.syllable.vowel[-1] in \ + ('a', 'ă', 'â', 'o', 'ô', 'ơ', 'e', 'ê'): + return transformation + elif transformation.mark == Mark.BREVE: + if self.syllable.vowel is not "" and \ + mark.strip(self.syllable.vowel) is 'a': + return transformation + elif transformation.mark == Mark.HORN: + v = mark.strip(self.syllable.vowel) + if v is not "" and \ + ('u' in v or 'o' in v): + return transformation + elif transformation.mark == Mark.BAR: + ic = self.syllable.initial_consonant + if ic is not "" and \ + mark.strip(ic)[-1] is 'd': + return transformation + elif type(transformation) is AddCharTransformation: + return transformation + elif type(transformation) is ByPassTransformation: + return transformation def add_key(self, key): - transformation = self.best_transformation( - self.rule.transformations_from_key(key)) + transformation = self.best_transformation(key) + # Recreate the syllable to fix the "gio" key sequence. + # After the 2 first keys, the syllable will have the form + # of ('g', 'i', ''). We want it to have the form + # ('gi', 'o', '') after the 3rd key. + self.syllable = Syllable.new_from_string(self.result()) self.syllable = transformation.perform(self.syllable) - self.transformations.append(transformation) - return self.syllable.as_string() + if transformation.__class__.__name__.startswith("Undo"): + self.syllable = self.syllable.append_char(key) + + self.transformations.append(transformation) + return self.result() diff --git a/bogo/test/test_core.py b/bogo/test/test_core.py index cb5df4e..dc4d414 100644 --- a/bogo/test/test_core.py +++ b/bogo/test/test_core.py @@ -64,16 +64,17 @@ def test_transformations_from_key_rule_key(self): rule = core.Rule({'w': ['*', '(']}) trans_list = rule.transformations_from_key('w') - eq_(len(trans_list), 2) + eq_(len(trans_list), 3) eq_(type(trans_list[0]), core.AddCharMarkTransformation) eq_(type(trans_list[1]), core.AddCharMarkTransformation) + eq_(type(trans_list[2]), core.ByPassTransformation) def test_transformations_from_key_non_rule_key(self): rule = core.Rule({'w': ['*', '(']}) trans_list = rule.transformations_from_key('a') eq_(len(trans_list), 1) - eq_(type(trans_list[0]), core.AddCharTransformation) + eq_(type(trans_list[0]), core.ByPassTransformation) class TestBoGo: @@ -84,6 +85,14 @@ def test_add_key_add_char(self): eq_(b.result(), 'a') eq_(b.raw_string(), 'a') + def test_add_key_add_mark(self): + b = core.BoGo(core.Rule({'a': '^'})) + b.add_key('a') + b.add_key('a') + + eq_(b.result(), 'â') + eq_(b.raw_string(), 'aa') + def test_add_key_add_tone(self): b = core.BoGo(core.Rule({'s': '/'})) b.add_key('a') @@ -91,3 +100,8 @@ def test_add_key_add_tone(self): eq_(b.result(), 'á') eq_(b.raw_string(), 'as') + + +class TestProcessSequence: + def test_normal_typing(self): + eq_(core.process_sequence('as'), 'á')