cleanUrl: "pybiomart-usage"
description: "pyBiomart 패키지의 사용법을 정리합니다."

pybiomart

Documentation

PyBiomart - pybiomart 0.2.0 documentation

Install

conda install -c bioconda pybiomart

Quickstart

from pybiomart import Dataset
dataset = Dataset(name='hsapiens_gene_ensembl', host='<http://www.ensembl.org>')

res = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name'])

server = Server(host='<http://www.ensembl.org>')
server.list_marts()

mart = server['ENSEMBL_MART_ENSEMBL']
mart.list_datasets()

dataset = mart['hsapiens_gene_ensembl']

# Datset의 attribute를 얻습니다.
dataset.list_attributes()

Mapping from ensg to symbol (Protein-coding gene only)

from pybiomart import Dataset
dataset = Dataset(name='hsapiens_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name', 'gene_biotype'])

ensg2symbol = {r['Gene stable ID']:r['Gene name'] for r in res[res['Gene type'] == 'protein_coding'].to_records()}

Mapping from ensg to symbol

from pybiomart import Dataset
dataset = Dataset(name='hsapiens_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name', 'gene_biotype'])

ensg2symbol = {r['Gene stable ID']:r['Gene name'] for r in res.to_records()}

Mapping from ensg to symbol (mouse)

from pybiomart import Dataset
dataset = Dataset(name='mmusculus_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name', 'gene_biotype'])

ensg2symbol = {r['Gene stable ID']:r['Gene name'] for r in res.to_records()}

Entrez ID to symbol

dataset = Dataset(name='hsapiens_gene_ensembl', host='<http://www.ensembl.org>')
res = dataset.query(attributes=['external_gene_name', 'entrezgene_id']).dropna()

entrez2symbol = {str(int(r['NCBI gene (formerly Entrezgene) ID'])):r['Gene name'] for r in res.to_records()}