forked from speechbrain/speechbrain
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdictionaries.py
More file actions
122 lines (94 loc) · 3.69 KB
/
dictionaries.py
File metadata and controls
122 lines (94 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Dictionary utilities, e.g. synonym dictionaries.
Authors
* Sylvain de Langen 2024"""
import json
from collections import defaultdict
from typing import Iterable
class SynonymDictionary:
"""Loads sets of synonym words and lets you look up if two words are
synonyms.
This could, for instance, be used to check for equality in the case of two
spellings of the same word when normalization might be unsuitable.
Synonyms are not considered to be transitive:
If A is a synonym of B and B is a synonym of C, then A is NOT considered a
synonym of C unless they are added in the same synonym set."""
def __init__(self):
self.word_map = defaultdict(set)
@staticmethod
def from_json_file(file) -> "SynonymDictionary":
"""Parses an opened file as JSON, where the top level structure is a
list of sets of synonyms (i.e. words that are all synonyms with each
other), e.g. `[ ["hello", "hi"], ["say", "speak", "talk"] ]`.
Arguments
---------
file
File object that supports reading (e.g. an `open`ed file)
Returns
-------
SynonymDictionary
Synonym dictionary frm the parsed JSON file with all synonym sets
added.
"""
d = json.load(file)
synonym_dict = SynonymDictionary()
for entry in d:
if isinstance(entry, list):
synonym_dict.add_synonym_set(entry)
else:
raise ValueError(
f"Unexpected entry type {type(entry)} in synonyms JSON (expected list)"
)
return synonym_dict
@staticmethod
def from_json_path(path) -> "SynonymDictionary":
"""Opens a file and parses it as JSON, with otherwise the same semantics
as :meth:`~SynonymDictionary.from_json_file`, which uses an opened file.
Arguments
---------
path : str
Path to the JSON file
Returns
-------
SynonymDictionary
Synonym dictionary frm the parsed JSON file with all synonym sets
added.
"""
with open(path, "r", encoding="utf8") as f:
return SynonymDictionary.from_json_file(f)
def add_synonym_set(self, words: Iterable[str]) -> None:
"""Add a set of words that are all synonyms with each other.
Arguments
---------
words : Iterable[str]
List of words that should be defined as synonyms to each other"""
word_set = set(words)
for word in word_set:
self.word_map[word].update(word_set - {word})
def __call__(self, a: str, b: str) -> bool:
"""Check for the equality or synonym equality of two words.
Arguments
---------
a : str
First word to compare. May be outside of the known dictionary.
b : str
Second word to compare. May be outside of the known dictionary.
The order of arguments does not matter.
Returns
-------
bool
Whether `a` and `b` should be considered synonyms. Not transitive,
see the main class documentation."""
return (a == b) or (b in self.word_map[a])
def get_synonyms_for(self, word: str) -> set:
"""Returns the set of synonyms for a given word.
Arguments
---------
word : str
The word to look up the synonyms of. May be outside of the known
dictionary.
Returns
-------
set of str
Set of known synonyms for this word. Do not mutate (or copy it
prior). May be empty if the word has no known synonyms."""
return self.word_map.get(word, set())