From edb6486b8c9a4c007534cd237686ae9e3ef6dc75 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Wed, 2 Jul 2014 19:29:07 +0700 Subject: [PATCH 01/12] Explain the etymology for BoGo --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index a2d835e..d3cfcb5 100644 --- a/README.md +++ b/README.md @@ -43,3 +43,8 @@ Some functions from `bogo.core` are exported to package toplevel: - `get_vni_definition()` BoGo is extensively tested with Python 2.7, Python 3.2 and Python 3.3. + +Etymology +--------- + +BoGo, or more precisely *bộ gõ*, literally means *input method* in Vietnamese. From 07fdbbaa357725e1c0a67ce78e6fe24b70e83a0b Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 10 Jul 2014 16:21:07 +0700 Subject: [PATCH 02/12] _accepted_chars() accepts Vietnamese characters with tone and mark as well ISSUE: https://github.com/BoGoEngine/bogo-python/issues/17 --- bogo/core.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index 697e3ba..71ca185 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -101,16 +101,14 @@ def get_vni_definition(): def _accepted_chars(rules): if sys.version_info[0] > 2: - accepted_chars = \ - string.ascii_letters + \ - ''.join(rules.keys()) + ascii_letters = \ + string.ascii_letters else: - accepted_chars = \ + ascii_letters = \ string.lowercase + \ - string.uppercase + \ - ''.join(rules.keys()) + string.uppercase - return accepted_chars + return set(ascii_letters + ''.join(rules.keys()) + utils.VOWELS + "đ") def process_sequence(sequence, From 24c77d5df6e84092968ecac3f86949d878e4b698 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 10 Jul 2014 16:23:44 +0700 Subject: [PATCH 03/12] Add tests --- bogo/test/test_engine.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bogo/test/test_engine.py b/bogo/test/test_engine.py index 35b871f..135c609 100644 --- a/bogo/test/test_engine.py +++ b/bogo/test/test_engine.py @@ -189,3 +189,7 @@ def test_with_separator(self): eq_(process_sequence('con meof dideen'), 'con mèo điên') eq_(process_sequence('con.meof'), 'con.mèo') eq_(process_sequence('con?meof'), 'con?mèo') + + def test_change_tone(self): + eq_(process_sequence('meofs'), 'méo') + eq_(process_sequence('mèos'), 'méo') \ No newline at end of file From 06977b53985070b7fa53fd0c3c4e0c3ea2d15b5e Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 7 Aug 2014 00:51:59 +0700 Subject: [PATCH 04/12] Implement handle_backspace() --- bogo/__init__.py | 3 ++- bogo/core.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/bogo/__init__.py b/bogo/__init__.py index d507d7c..a147ff5 100644 --- a/bogo/__init__.py +++ b/bogo/__init__.py @@ -28,4 +28,5 @@ process_key, \ process_sequence, \ get_telex_definition, \ - get_vni_definition + get_vni_definition, \ + handle_backspace diff --git a/bogo/core.py b/bogo/core.py index 71ca185..8b150b8 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -487,3 +487,29 @@ def atomic_check(action): accent.remove_accent_char(comps[1][-1])) # ơ, ư return any(map(atomic_check, action_list)) + + +def handle_backspace(converted_string, raw_sequence): + """ + Returns a new converted_string and a new raw_sequence + after a backspace. + """ + # I can't find a simple explanation for this, so + # I hope this example can help clarify it: + # + # handle_backspace(thương, thuwongw) -> (thươn, thuwonw) + # handle_backspace(thươn, thuwonw) -> (thươ, thuwow) + # handle_backspace(thươ, thuwow) -> (thư, thuw) + # handle_backspace(thươ, thuw) -> (th, th) + # + # The algorithm for handle_backspace was contributed by @hainp. + + deleted_char = converted_string[-1] + converted_string = converted_string[:-1] + + index = raw_sequence.rfind(deleted_char) + raw_sequence = raw_sequence[:-2] if index < 0 else \ + raw_sequence[:index] + \ + raw_sequence[(index + 1):] + + return converted_string, raw_sequence From b27827fd35ca72011011ba061c0687786c2c14c5 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 7 Aug 2014 19:32:20 +0700 Subject: [PATCH 05/12] Better handle_backspace --- bogo/core.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index 8b150b8..1ae5460 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -507,9 +507,15 @@ def handle_backspace(converted_string, raw_sequence): deleted_char = converted_string[-1] converted_string = converted_string[:-1] - index = raw_sequence.rfind(deleted_char) - raw_sequence = raw_sequence[:-2] if index < 0 else \ - raw_sequence[:index] + \ - raw_sequence[(index + 1):] + _accent = accent.get_accent_char(deleted_char) + _mark = mark.get_mark_char(deleted_char) + + if _mark and _accent: + raw_sequence = raw_sequence[:-3] + elif _mark or _accent: + raw_sequence = raw_sequence[:-2] + else: + index = raw_sequence.rfind(deleted_char) + raw_sequence = raw_sequence[:index] + raw_sequence[(index + 1):] return converted_string, raw_sequence From 5305d89087fb7387caa2a542bc42f2a0e26a506a Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 7 Aug 2014 19:53:21 +0700 Subject: [PATCH 06/12] Add tests for handle_backspace --- bogo/test/test_engine.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/bogo/test/test_engine.py b/bogo/test/test_engine.py index 135c609..7e71190 100644 --- a/bogo/test/test_engine.py +++ b/bogo/test/test_engine.py @@ -6,7 +6,7 @@ from functools import partial import codecs -from bogo.core import _Action, _get_action, process_sequence +from bogo.core import _Action, _get_action, process_sequence, handle_backspace from bogo.mark import Mark import os @@ -192,4 +192,21 @@ def test_with_separator(self): def test_change_tone(self): eq_(process_sequence('meofs'), 'méo') - eq_(process_sequence('mèos'), 'méo') \ No newline at end of file + eq_(process_sequence('mèos'), 'méo') + + +class TestHandleBackspace(): + + def test_delete_non_im_key(self): + eq_(handle_backspace('an', 'an'), ('a', 'a')) + eq_(handle_backspace('a', 'a'), ('', '')) + + def test_delete_one_im_key(self): + eq_(handle_backspace('bà', 'baf'), ('b', 'b')) + eq_(handle_backspace('bâ', 'baa'), ('b', 'b')) + + def test_delete_two_im_keys(self): + eq_(handle_backspace('bớ', 'bows'), ('b', 'b')) + + def test_non_im_key_before_im_key(self): + eq_(handle_backspace('bân', 'bana'), ('bâ', 'baa')) \ No newline at end of file From 8a6ca0558cbb998d0dfeb320aaee31c5c6e1bb83 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 7 Aug 2014 20:05:15 +0700 Subject: [PATCH 07/12] Bump version to 1.1 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c55104a..e1d04c2 100644 --- a/setup.py +++ b/setup.py @@ -5,12 +5,12 @@ setup( name='bogo', packages=['bogo'], - version='1.0.1', + version='1.1', description='Library for implementing Vietnamese input method editors with a purely functional interface.', author='Trung Ngo', author_email='ndtrung4419@gmail.com', url='https://github.com/BoGoEngine/bogo-python', - download_url='https://github.com/BoGoEngine/bogo-python/archive/v1.0.tar.gz', + download_url='https://github.com/BoGoEngine/bogo-python/archive/v1.1.tar.gz', keywords=['vietnamese'], classifiers=[ "Programming Language :: Python", From fa4581febcd963032a7057d3b9377854f170fa59 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Thu, 7 Aug 2014 20:14:42 +0700 Subject: [PATCH 08/12] Add notes for maintainer --- NOTES.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 NOTES.md diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..237ca0a --- /dev/null +++ b/NOTES.md @@ -0,0 +1,7 @@ +Workflow for releasing a new version: + +1. Update the version in `setup.py` (even the `download_url`). Commit it with message "Bump version to x.x.x" +2. Tag the new commit as vx.x.x (v1.1, v1.0.1) +3. Push to Github +4. Update the releases page: https://github.com/BoGoEngine/bogo-python/releases +5. Upload to PyPI: https://docs.python.org/2/distutils/packageindex.html#package-index \ No newline at end of file From f7141c0b633d508510fd4e1acf407e096f5991f4 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 10 Aug 2014 00:47:12 +0700 Subject: [PATCH 09/12] Remove the note about being functional --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index d3cfcb5..2ee3e5a 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,7 @@ BoGo [![Build Status](https://travis-ci.org/BoGoEngine/bogo-python.svg?branch=master)](https://travis-ci.org/BoGoEngine/bogo-python) [![Coverage Status](https://coveralls.io/repos/BoGoEngine/bogo-python/badge.png?branch=master)](https://coveralls.io/r/BoGoEngine/bogo-python?branch=master) -BoGo is a Vietnamese input method conversion library for Python. This library -is intentionally functional with no internal state and side-effect. +BoGo is a Vietnamese input method conversion library for Python. Installation ------------ From d2201ad37162ce59d539650ab863027fc0c4b4f3 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 10 Aug 2014 11:52:48 +0700 Subject: [PATCH 10/12] [API-BREAKAGE] More flexible handle_backspace() --- bogo/core.py | 36 +++++++++++++++++++++++++++++++----- bogo/test/test_engine.py | 38 +++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index 1ae5460..23aa26c 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -489,7 +489,7 @@ def atomic_check(action): return any(map(atomic_check, action_list)) -def handle_backspace(converted_string, raw_sequence): +def handle_backspace(converted_string, raw_sequence, im_rules=None): """ Returns a new converted_string and a new raw_sequence after a backspace. @@ -504,16 +504,42 @@ def handle_backspace(converted_string, raw_sequence): # # The algorithm for handle_backspace was contributed by @hainp. + if im_rules == None: + im_rules = get_telex_definition() + deleted_char = converted_string[-1] converted_string = converted_string[:-1] _accent = accent.get_accent_char(deleted_char) _mark = mark.get_mark_char(deleted_char) - if _mark and _accent: - raw_sequence = raw_sequence[:-3] - elif _mark or _accent: - raw_sequence = raw_sequence[:-2] + if _mark or _accent: + # Find a sequence of IM keys at the end of + # raw_sequence + + ime_keys_at_end = "" + len_raw_sequence = len(raw_sequence) + i = len_raw_sequence - 1 + + while i >= 0: + if raw_sequence[i] not in im_rules and \ + raw_sequence[i] not in "aeioud": + i += 1 + break + else: + ime_keys_at_end = raw_sequence[i] + ime_keys_at_end + i -= 1 + + # Try to find a subsequence from that sequence + # that can be converted to the deleted_char + k = 0 + while k < len_raw_sequence: + print(raw_sequence[i + k:]) + if process_sequence(raw_sequence[i + k:], im_rules) == deleted_char: + # Delete that subsequence + raw_sequence = raw_sequence[:i + k] + break + k += 1 else: index = raw_sequence.rfind(deleted_char) raw_sequence = raw_sequence[:index] + raw_sequence[(index + 1):] diff --git a/bogo/test/test_engine.py b/bogo/test/test_engine.py index 7e71190..657e975 100644 --- a/bogo/test/test_engine.py +++ b/bogo/test/test_engine.py @@ -6,6 +6,7 @@ from functools import partial import codecs +import bogo from bogo.core import _Action, _get_action, process_sequence, handle_backspace from bogo.mark import Mark import os @@ -197,16 +198,27 @@ def test_change_tone(self): class TestHandleBackspace(): - def test_delete_non_im_key(self): - eq_(handle_backspace('an', 'an'), ('a', 'a')) - eq_(handle_backspace('a', 'a'), ('', '')) - - def test_delete_one_im_key(self): - eq_(handle_backspace('bà', 'baf'), ('b', 'b')) - eq_(handle_backspace('bâ', 'baa'), ('b', 'b')) - - def test_delete_two_im_keys(self): - eq_(handle_backspace('bớ', 'bows'), ('b', 'b')) - - def test_non_im_key_before_im_key(self): - eq_(handle_backspace('bân', 'bana'), ('bâ', 'baa')) \ No newline at end of file + def test_delete_non_im_key(self): + eq_(handle_backspace('an', 'an'), ('a', 'a')) + eq_(handle_backspace('a', 'a'), ('', '')) + + def test_delete_one_im_key(self): + eq_(handle_backspace('bà', 'baf'), ('b', 'b')) + eq_(handle_backspace('bâ', 'baa'), ('b', 'b')) + + def test_delete_two_im_keys(self): + eq_(handle_backspace('bớ', 'bows'), ('b', 'b')) + + def test_non_im_key_before_im_key(self): + eq_(handle_backspace('bân', 'bana'), ('bâ', 'baa')) + + def test_im_insert_key(self): + eq_(handle_backspace('bư', 'bw'), ('b', 'b')) + eq_(handle_backspace('boư', 'bow'), ('bo', 'bo')) + eq_(handle_backspace('bá', 'bafjxrs'), ('b', 'b')) + eq_(handle_backspace('bá', 'ba23451', + bogo.core.get_vni_definition()), ('b', 'b')) + + # FIXME + # eq_(handle_backspace('dườ', 'duwfow'), ('dư', 'duw')) + # eq_(handle_backspace('uyể', 'uryee'), ('uy', 'uy')) From f408037b2ee836101c5e6a0c2fea79ff52cbcc9d Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 10 Aug 2014 12:28:18 +0700 Subject: [PATCH 11/12] [API-BREAKAGE] handle_backspace() now only returns a new raw_sequence --- bogo/core.py | 15 +++++++-------- bogo/test/test_engine.py | 27 +++++++++++++++------------ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index 23aa26c..cbe77cb 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -491,16 +491,16 @@ def atomic_check(action): def handle_backspace(converted_string, raw_sequence, im_rules=None): """ - Returns a new converted_string and a new raw_sequence - after a backspace. + Returns a new raw_sequence after a backspace. This raw_sequence should + be pushed back to process_sequence(). """ # I can't find a simple explanation for this, so # I hope this example can help clarify it: # - # handle_backspace(thương, thuwongw) -> (thươn, thuwonw) - # handle_backspace(thươn, thuwonw) -> (thươ, thuwow) - # handle_backspace(thươ, thuwow) -> (thư, thuw) - # handle_backspace(thươ, thuw) -> (th, th) + # handle_backspace(thương, thuwongw) -> thuwonw + # handle_backspace(thươn, thuwonw) -> thuwow + # handle_backspace(thươ, thuwow) -> thuw + # handle_backspace(thươ, thuw) -> th # # The algorithm for handle_backspace was contributed by @hainp. @@ -508,7 +508,6 @@ def handle_backspace(converted_string, raw_sequence, im_rules=None): im_rules = get_telex_definition() deleted_char = converted_string[-1] - converted_string = converted_string[:-1] _accent = accent.get_accent_char(deleted_char) _mark = mark.get_mark_char(deleted_char) @@ -544,4 +543,4 @@ def handle_backspace(converted_string, raw_sequence, im_rules=None): index = raw_sequence.rfind(deleted_char) raw_sequence = raw_sequence[:index] + raw_sequence[(index + 1):] - return converted_string, raw_sequence + return raw_sequence diff --git a/bogo/test/test_engine.py b/bogo/test/test_engine.py index 657e975..e829937 100644 --- a/bogo/test/test_engine.py +++ b/bogo/test/test_engine.py @@ -199,26 +199,29 @@ def test_change_tone(self): class TestHandleBackspace(): def test_delete_non_im_key(self): - eq_(handle_backspace('an', 'an'), ('a', 'a')) - eq_(handle_backspace('a', 'a'), ('', '')) + eq_(handle_backspace('an', 'an'), 'a') + eq_(handle_backspace('a', 'a'), '') def test_delete_one_im_key(self): - eq_(handle_backspace('bà', 'baf'), ('b', 'b')) - eq_(handle_backspace('bâ', 'baa'), ('b', 'b')) + eq_(handle_backspace('bà', 'baf'), 'b') + eq_(handle_backspace('bâ', 'baa'), 'b') def test_delete_two_im_keys(self): - eq_(handle_backspace('bớ', 'bows'), ('b', 'b')) + eq_(handle_backspace('bớ', 'bows'), 'b') def test_non_im_key_before_im_key(self): - eq_(handle_backspace('bân', 'bana'), ('bâ', 'baa')) + eq_(handle_backspace('bân', 'bana'), 'baa') def test_im_insert_key(self): - eq_(handle_backspace('bư', 'bw'), ('b', 'b')) - eq_(handle_backspace('boư', 'bow'), ('bo', 'bo')) - eq_(handle_backspace('bá', 'bafjxrs'), ('b', 'b')) + eq_(handle_backspace('bư', 'bw'), 'b') + eq_(handle_backspace('boư', 'bow'), 'bo') + eq_(handle_backspace('bá', 'bafjxrs'), 'b') eq_(handle_backspace('bá', 'ba23451', - bogo.core.get_vni_definition()), ('b', 'b')) + bogo.core.get_vni_definition()), 'b') # FIXME - # eq_(handle_backspace('dườ', 'duwfow'), ('dư', 'duw')) - # eq_(handle_backspace('uyể', 'uryee'), ('uy', 'uy')) + # eq_(handle_backspace('dườ', 'duwfow'), 'duw') + # eq_(handle_backspace('uyể', 'uryee'), 'uy') + + def test_single_im_key_two_vowels(self): + eq_(handle_backspace('bươ', 'buow'), 'bu') \ No newline at end of file From 9b85329a408ded4cead3539cecba12984d5d7650 Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sun, 10 Aug 2014 12:39:26 +0700 Subject: [PATCH 12/12] Fix a bug in which handle_backspace misses "y" --- bogo/core.py | 3 +-- bogo/test/test_engine.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bogo/core.py b/bogo/core.py index cbe77cb..8e16783 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -522,7 +522,7 @@ def handle_backspace(converted_string, raw_sequence, im_rules=None): while i >= 0: if raw_sequence[i] not in im_rules and \ - raw_sequence[i] not in "aeioud": + raw_sequence[i] not in "aeiouyd": i += 1 break else: @@ -533,7 +533,6 @@ def handle_backspace(converted_string, raw_sequence, im_rules=None): # that can be converted to the deleted_char k = 0 while k < len_raw_sequence: - print(raw_sequence[i + k:]) if process_sequence(raw_sequence[i + k:], im_rules) == deleted_char: # Delete that subsequence raw_sequence = raw_sequence[:i + k] diff --git a/bogo/test/test_engine.py b/bogo/test/test_engine.py index e829937..db1fa70 100644 --- a/bogo/test/test_engine.py +++ b/bogo/test/test_engine.py @@ -205,6 +205,7 @@ def test_delete_non_im_key(self): def test_delete_one_im_key(self): eq_(handle_backspace('bà', 'baf'), 'b') eq_(handle_backspace('bâ', 'baa'), 'b') + eq_(handle_backspace('sý', 'sys'), 's') def test_delete_two_im_keys(self): eq_(handle_backspace('bớ', 'bows'), 'b')