cleanUrl: "pybiomart-usage"
description: "pyBiomart 패키지의 사용법을 정리합니다."
PyBiomart - pybiomart 0.2.0 documentation
conda install -c bioconda pybiomart
from pybiomart import Dataset
dataset = Dataset(name='hsapiens_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name'])
server = Server(host='<http://www.ensembl.org>')
server.list_marts()
mart = server['ENSEMBL_MART_ENSEMBL']
mart.list_datasets()
dataset = mart['hsapiens_gene_ensembl']
# Datset의 attribute를 얻습니다.
dataset.list_attributes()
from pybiomart import Dataset
dataset = Dataset(name='hsapiens_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name', 'gene_biotype'])
ensg2symbol = {r['Gene stable ID']:r['Gene name'] for r in res[res['Gene type'] == 'protein_coding'].to_records()}
from pybiomart import Dataset
dataset = Dataset(name='hsapiens_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name', 'gene_biotype'])
ensg2symbol = {r['Gene stable ID']:r['Gene name'] for r in res.to_records()}
from pybiomart import Dataset
dataset = Dataset(name='mmusculus_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name', 'gene_biotype'])
ensg2symbol = {r['Gene stable ID']:r['Gene name'] for r in res.to_records()}
dataset = Dataset(name='hsapiens_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['external_gene_name', 'entrezgene_id']).dropna()
entrez2symbol = {str(int(r['NCBI gene (formerly Entrezgene) ID'])):r['Gene name'] for r in res.to_records()}