Skip to content

Commit 994b649

Browse files
committed
Add class for loading gene association file
1 parent 7d7f9f8 commit 994b649

File tree

1 file changed

+93
-0
lines changed

1 file changed

+93
-0
lines changed

flib/core/goa.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import sys
2+
import logging
3+
from collections import defaultdict
4+
import re
5+
import requests
6+
import urllib2
7+
import gzip
8+
import io
9+
10+
from onto import GeneOntology
11+
from entrez import Entrez
12+
from idmap import IDMap
13+
14+
logging.basicConfig()
15+
logger = logging.getLogger(__name__)
16+
logger.setLevel(logging.INFO)
17+
18+
GO_NAMES = {
19+
'Arabidopsis thaliana': 'tair',
20+
'Homo sapiens': 'human',
21+
'Mus musculus': 'mgi',
22+
'Rattus norvegicus': 'rgd',
23+
'Danio rerio': 'zfin',
24+
'Drosophila melanogaster': 'fb',
25+
'Saccharomyces cerevisiae': 'sgd',
26+
'Caenorhabditis elegans': 'wb',
27+
'Pseudomonas aeruginosa': 'pseudocap'
28+
}
29+
30+
GO_PREFIX = ['goa_', 'gene_association.']
31+
GO_ASSOC_SUFFIX = ['.gaf.gz', '.gz']
32+
GO_INFO_SUFFIX = ['.gaf.json', '.json']
33+
34+
GO_NAMESPACE_MAP = {
35+
'biological_process': 'BP',
36+
'molecular_function': 'MF',
37+
'cellular_component': 'CC',
38+
}
39+
40+
GO_ASSOC_URL = 'http://www.geneontology.org/gene-associations/'
41+
GO_VERSION_KEY = 'submissionDate'
42+
43+
class GOA:
44+
'''Gene ontology associations'''
45+
def __init__(self, org = 'Homo sapiens'):
46+
self._onto = None
47+
self._data = None
48+
self._org = org
49+
50+
def load_onto(self, onto=None, idmap=None):
51+
if not onto:
52+
onto = GeneOntology.generate()
53+
54+
for prefix, suffix in zip(GO_PREFIX, GO_ASSOC_SUFFIX):
55+
annot_zip = GO_ASSOC_URL + \
56+
''.join((prefix, GO_NAMES[self._org], suffix))
57+
ret = requests.head(annot_zip)
58+
if ret.status_code < 400:
59+
logger.info('Loading: %s', annot_zip)
60+
onto.populate_annotations(
61+
annot_zip,
62+
remote_location=True)
63+
break
64+
else:
65+
logger.info('URL not available: %s', annot_zip)
66+
67+
if idmap:
68+
onto.map_genes(idmap, xdb_prefixed=True)
69+
70+
self._onto = onto
71+
return onto
72+
73+
def load_data(self):
74+
annot_zip = GO_ASSOC_URL + ''.join((prefix, GO_NAMES[self._org], suffix))
75+
ret = requests.head(annot_zip)
76+
if ret.status_code < 400:
77+
logger.info('Loading: %s', annot_zip)
78+
gene_ontology.populate_annotations(
79+
annot_zip,
80+
remote_location=True)
81+
82+
return True
83+
84+
if __name__ == '__main__':
85+
entrez_map = Entrez()
86+
entrez_map.load()
87+
88+
#gwas = GWASCatalog()
89+
#onto = gwas.load_onto(idmap=entrez_map.get_symbol_map())
90+
#onto.print_to_gmt_file('test.txt')
91+
goa = GOA()
92+
onto = goa.load_onto(idmap=entrez_map.get_xref_map())
93+
onto.print_to_gmt_file('go.gmt')

0 commit comments

Comments
 (0)