forked from aboutcode-org/commoncode
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfileset.py
More file actions
193 lines (160 loc) · 6.33 KB
/
fileset.py
File metadata and controls
193 lines (160 loc) · 6.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/commoncode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import fnmatch
import os
from commoncode import fileutils
from commoncode import paths
TRACE = False
if TRACE:
import logging
import sys
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
logger.setLevel(logging.DEBUG)
"""
Match files and directories paths based on inclusion and exclusion glob-style
patterns.
For example, this can be used to skip files that match ignore patterns,
similar to a version control ignore files such as .gitignore.
The pattern syntax is the same as fnmatch(5) as implemented in Python.
Patterns are applied to a path this way:
- Paths are converted to POSIX paths before matching.
- Patterns are NOT case-sensitive.
- Leading slashes are ignored.
- If the pattern contains a /, then the whole path must be matched;
otherwise, the pattern matches if any path segment matches.
- When matched, a directory content is matched recursively.
For instance, when using patterns for ignoring, a matched directory will
be ignored with its file and sub-directories at full depth.
- The order of patterns does not matter, except for exclusions vs. inclusions.
- Exclusion patterns are prefixed with an exclamation mark (bang or !)
meaning that matched paths by that pattern will be excluded. Exclusions
have precedence of inclusions.
- Patterns starting with # are comments and skipped. use [#] for a literal #.
- to match paths relative to some root path, you must design your patterns
and the paths to be tested accordingly. This module does not handles this.
Patterns may include glob wildcards such as:
- ? : matches any single character.
- * : matches 0 or more characters.
- [seq] : matches any character in seq
- [!seq] :matches any character not in seq
For a literal match, wrap the meta-characters in brackets. For example, '[?]'
matches the character '?'.
"""
def is_included(path, includes=None, excludes=None):
"""
Return a True if `path` is included based on mapping of `includes` and
`excludes` glob patterns. If the `path` is empty, return False.
Matching is done based on the set of `includes` and `excludes` patterns maps
of {fnmatch pattern: message}. If `includes` are provided they are tested
first. The `excludes` are tested second if provided.
The ordering of the includes and excludes items does not matter and if a map
is empty, it is not used for matching.
"""
if not path or not path.strip():
return False
if not includes and not excludes:
return True
includes = includes or {}
includes = {k: v for k, v in includes.items() if k}
excludes = excludes or {}
excludes = {k: v for k, v in excludes.items() if k}
if includes:
included = get_matches(path, includes, all_matches=False)
if TRACE:
logger.debug('in_fileset: path: %(path)r included:%(included)r' % locals())
if not included:
return False
if excludes:
excluded = get_matches(path, excludes, all_matches=False)
if TRACE:
logger.debug('in_fileset: path: %(path)r excluded:%(excluded)r .' % locals())
if excluded:
return False
return True
def get_matches(path, patterns, all_matches=False):
"""
Return a list of values (which are values from the matched `patterns`
mappint of {pattern: value or message} if `path` is matched by any of the
pattern from the `patterns` map or an empty list.
If `all_matches` is False, stop and return on the first matched pattern.
"""
if not path or not patterns:
return False
path = fileutils.as_posixpath(path).lower()
pathstripped = path.lstrip('/0')
if not pathstripped:
return False
segments = paths.split(pathstripped)
if TRACE:
logger.debug('_match: path: %(path)r patterns:%(patterns)r.' % locals())
matches = []
if not isinstance(patterns, dict):
assert isinstance(patterns, (list, tuple)), 'Invalid patterns: {}'.format(patterns)
patterns = {p: p for p in patterns}
for pat, value in patterns.items():
if not pat or not pat.strip():
continue
value = value or ''
pat = pat.lstrip('/').lower()
is_plain = '/' not in pat
if is_plain:
if any(fnmatch.fnmatchcase(s, pat) for s in segments):
matches.append(value)
if not all_matches:
break
elif (fnmatch.fnmatchcase(path, pat) or fnmatch.fnmatchcase(pathstripped, pat)):
matches.append(value)
if not all_matches:
break
if TRACE:
logger.debug('_match: matches: %(matches)r' % locals())
if not all_matches:
if matches:
return matches[0]
else:
return False
return matches
def load(location):
"""
Return a sequence of patterns from a file at location.
"""
if not location:
return tuple()
fn = os.path.abspath(os.path.normpath(os.path.expanduser(location)))
msg = ('File %(location)s does not exist or not a file.') % locals()
assert (os.path.exists(fn) and os.path.isfile(fn)), msg
mode = 'r'
with open(fn, mode) as f:
return [l.strip() for l in f if l and l.strip()]
def includes_excludes(patterns, message):
"""
Return a dict of included patterns and a dict of excluded patterns from a
sequence of `patterns` strings and a `message` setting the message as
value in the returned mappings. Ignore pattern as comments if prefixed
with #. Use an empty string is message is None.
"""
message = message or ''
BANG = '!'
POUND = '#'
included = {}
excluded = {}
if not patterns:
return included, excluded
for pat in patterns:
pat = pat.strip()
if not pat or pat.startswith(POUND):
continue
if pat.startswith(BANG):
cpat = pat.lstrip(BANG)
if cpat:
excluded[cpat] = message
continue
else:
included.add[pat] = message
return included, excluded