python goatools使用,,用david搞定了所


用david搞定了所有的GOterm后,接下来就是利用goslim处理这些term。

用的包是goatools,需要下载几个obo文件以及,fisher,pygraphviz以及graphviz等几个模块。

# -*- coding: utf-8 -*-"""Created on Fri Nov 21 20:06:42 2014@author: hluo"""#!/usr/bin/python# -*- coding: UTF-8 -*-import osimport os.path as opimport syssys.path.insert(0, op.join(op.dirname(__file__), ".."))from obo_parser import GODagfrom mapslim import mapslimimport jsonimport re#from goatools.mapslim import mapslim# copied from find_enrichment.pydef get_goslim(term, godag, goslimdag):    if not term in godag:        return [None, None]    else:        direct_anc, all_anc = mapslim(term, godag, goslimdag)        return [direct_anc, all_anc]        def mygofun(json_file):    obo_file = ‘/home/hluo/Desktop/goslim/go-basic.obo‘    assert os.path.exists(obo_file), "file %s not found!" % obo_file    slim_obo_file = ‘/home/hluo/Desktop/goslim/goslim_generic.obo‘    assert os.path.exists(slim_obo_file), "file %s not found!" % slim_obo_file    # load DAGs    go_dag = GODag(obo_file)    goslim_dag = GODag(slim_obo_file)    #json_file = ‘NC_000913.gbk.json‘    myrecord = json.load(open(json_file))    #re_obj = re.compile(r‘GO\:\d+‘)    re_obj = re.compile(r‘(?<=\$)\S+(?=\~)‘)    #a =  re.match(re_Obj, text)    #mylist = []    for item in myrecord:        mykey = filter(lambda x: x.startswith(‘GO‘), item.keys())        for k in mykey:            tplist = [set(), set()]            for text in item[k]:                tplist1 = []                goterm = re.findall(re_obj, text)[0]                tplist1 = get_goslim(goterm, go_dag, goslim_dag)                tplist[0] |= tplist1[0]                tplist[1] |= tplist1[1]            item[k + ‘_dslim‘] = list(tplist[0])            item[k + ‘_aslim‘] = list(tplist[1])    json.dump(myrecord, open(‘%s.txt‘ % json_file, ‘w‘), indent = 1)        if __name__ == ‘__main__‘:    mygofun(NC_000913.gbk.json)

the script loads the json format file, and adds the new keys to every record in the file.

ps. dslim: direct slim. aslim: all slim.

Then I run a python batch script to process all the json files.

# -*- coding: utf-8 -*-"""Created on Mon Nov 24 17:37:24 2014@author: hluo"""import osimport reimport sysfrom mygoslim import mygofunif __name__ == ‘__main__‘:    mydir = ‘/home/hluo/Desktop/gbk‘    flist = os.listdir(mydir)    re_obj = re.compile(‘\.json$‘)    #re_obj1 = re.compile(‘NC_\d{6}\.gbk‘)    json_file_list = []    for item in flist:        re_item = re.findall(re_obj, item)        if re_item:            json_file_list.append(item)        for item in json_file_list:        mygofun(‘%s/%s‘ % (mydir, item))


In the script, use the ‘re‘ and ‘os‘ module to get all the json files.

python goatools使用

评论关闭