diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..237ca0a --- /dev/null +++ b/NOTES.md @@ -0,0 +1,7 @@ +Workflow for releasing a new version: + +1. Update the version in `setup.py` (even the `download_url`). Commit it with message "Bump version to x.x.x" +2. Tag the new commit as vx.x.x (v1.1, v1.0.1) +3. Push to Github +4. Update the releases page: https://github.com/BoGoEngine/bogo-python/releases +5. Upload to PyPI: https://docs.python.org/2/distutils/packageindex.html#package-index \ No newline at end of file diff --git a/README.md b/README.md index a2d835e..2ee3e5a 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,7 @@ BoGo [![Build Status](https://travis-ci.org/BoGoEngine/bogo-python.svg?branch=master)](https://travis-ci.org/BoGoEngine/bogo-python) [![Coverage Status](https://coveralls.io/repos/BoGoEngine/bogo-python/badge.png?branch=master)](https://coveralls.io/r/BoGoEngine/bogo-python?branch=master) -BoGo is a Vietnamese input method conversion library for Python. This library -is intentionally functional with no internal state and side-effect. +BoGo is a Vietnamese input method conversion library for Python. Installation ------------ @@ -43,3 +42,8 @@ Some functions from `bogo.core` are exported to package toplevel: - `get_vni_definition()` BoGo is extensively tested with Python 2.7, Python 3.2 and Python 3.3. + +Etymology +--------- + +BoGo, or more precisely *bộ gõ*, literally means *input method* in Vietnamese. diff --git a/bogo/__init__.py b/bogo/__init__.py index d507d7c..a147ff5 100644 --- a/bogo/__init__.py +++ b/bogo/__init__.py @@ -28,4 +28,5 @@ process_key, \ process_sequence, \ get_telex_definition, \ - get_vni_definition + get_vni_definition, \ + handle_backspace diff --git a/bogo/core.py b/bogo/core.py index 697e3ba..8e16783 100644 --- a/bogo/core.py +++ b/bogo/core.py @@ -101,16 +101,14 @@ def get_vni_definition(): def _accepted_chars(rules): if sys.version_info[0] > 2: - accepted_chars = \ - string.ascii_letters + \ - ''.join(rules.keys()) + ascii_letters = \ + string.ascii_letters else: - accepted_chars = \ + ascii_letters = \ string.lowercase + \ - string.uppercase + \ - ''.join(rules.keys()) + string.uppercase - return accepted_chars + return set(ascii_letters + ''.join(rules.keys()) + utils.VOWELS + "đ") def process_sequence(sequence, @@ -489,3 +487,59 @@ def atomic_check(action): accent.remove_accent_char(comps[1][-1])) # ơ, ư return any(map(atomic_check, action_list)) + + +def handle_backspace(converted_string, raw_sequence, im_rules=None): + """ + Returns a new raw_sequence after a backspace. This raw_sequence should + be pushed back to process_sequence(). + """ + # I can't find a simple explanation for this, so + # I hope this example can help clarify it: + # + # handle_backspace(thương, thuwongw) -> thuwonw + # handle_backspace(thươn, thuwonw) -> thuwow + # handle_backspace(thươ, thuwow) -> thuw + # handle_backspace(thươ, thuw) -> th + # + # The algorithm for handle_backspace was contributed by @hainp. + + if im_rules == None: + im_rules = get_telex_definition() + + deleted_char = converted_string[-1] + + _accent = accent.get_accent_char(deleted_char) + _mark = mark.get_mark_char(deleted_char) + + if _mark or _accent: + # Find a sequence of IM keys at the end of + # raw_sequence + + ime_keys_at_end = "" + len_raw_sequence = len(raw_sequence) + i = len_raw_sequence - 1 + + while i >= 0: + if raw_sequence[i] not in im_rules and \ + raw_sequence[i] not in "aeiouyd": + i += 1 + break + else: + ime_keys_at_end = raw_sequence[i] + ime_keys_at_end + i -= 1 + + # Try to find a subsequence from that sequence + # that can be converted to the deleted_char + k = 0 + while k < len_raw_sequence: + if process_sequence(raw_sequence[i + k:], im_rules) == deleted_char: + # Delete that subsequence + raw_sequence = raw_sequence[:i + k] + break + k += 1 + else: + index = raw_sequence.rfind(deleted_char) + raw_sequence = raw_sequence[:index] + raw_sequence[(index + 1):] + + return raw_sequence diff --git a/bogo/test/test_engine.py b/bogo/test/test_engine.py index 35b871f..db1fa70 100644 --- a/bogo/test/test_engine.py +++ b/bogo/test/test_engine.py @@ -6,7 +6,8 @@ from functools import partial import codecs -from bogo.core import _Action, _get_action, process_sequence +import bogo +from bogo.core import _Action, _get_action, process_sequence, handle_backspace from bogo.mark import Mark import os @@ -189,3 +190,39 @@ def test_with_separator(self): eq_(process_sequence('con meof dideen'), 'con mèo điên') eq_(process_sequence('con.meof'), 'con.mèo') eq_(process_sequence('con?meof'), 'con?mèo') + + def test_change_tone(self): + eq_(process_sequence('meofs'), 'méo') + eq_(process_sequence('mèos'), 'méo') + + +class TestHandleBackspace(): + + def test_delete_non_im_key(self): + eq_(handle_backspace('an', 'an'), 'a') + eq_(handle_backspace('a', 'a'), '') + + def test_delete_one_im_key(self): + eq_(handle_backspace('bà', 'baf'), 'b') + eq_(handle_backspace('bâ', 'baa'), 'b') + eq_(handle_backspace('sý', 'sys'), 's') + + def test_delete_two_im_keys(self): + eq_(handle_backspace('bớ', 'bows'), 'b') + + def test_non_im_key_before_im_key(self): + eq_(handle_backspace('bân', 'bana'), 'baa') + + def test_im_insert_key(self): + eq_(handle_backspace('bư', 'bw'), 'b') + eq_(handle_backspace('boư', 'bow'), 'bo') + eq_(handle_backspace('bá', 'bafjxrs'), 'b') + eq_(handle_backspace('bá', 'ba23451', + bogo.core.get_vni_definition()), 'b') + + # FIXME + # eq_(handle_backspace('dườ', 'duwfow'), 'duw') + # eq_(handle_backspace('uyể', 'uryee'), 'uy') + + def test_single_im_key_two_vowels(self): + eq_(handle_backspace('bươ', 'buow'), 'bu') \ No newline at end of file diff --git a/setup.py b/setup.py index c55104a..e1d04c2 100644 --- a/setup.py +++ b/setup.py @@ -5,12 +5,12 @@ setup( name='bogo', packages=['bogo'], - version='1.0.1', + version='1.1', description='Library for implementing Vietnamese input method editors with a purely functional interface.', author='Trung Ngo', author_email='ndtrung4419@gmail.com', url='https://github.com/BoGoEngine/bogo-python', - download_url='https://github.com/BoGoEngine/bogo-python/archive/v1.0.tar.gz', + download_url='https://github.com/BoGoEngine/bogo-python/archive/v1.1.tar.gz', keywords=['vietnamese'], classifiers=[ "Programming Language :: Python",