# uniprot_config.properties - for each organism, configure where to get identifiers for genes and
# which field should be unique. Also, which features to load
#
# you can configure:
# a. which gene field must be unique
# b. where in the XML file to get the gene's identifier
#
# ~ a. UNIQUE FIELD ~
# [taxonId].uniqueField = primaryIdentifier / secondaryIdentifier
# * defines which field should be unique
#
# ~ b. IDENTIFIERS ~
# [taxonId].[identifierType].[where to get the identifier]
#
# identifierType = primaryIdentifier / secondaryIdentifier
# where to get the identifier = dbref / name
# * dbref
# use identifier from the dbref section of the XML. eg:
# 7227.primaryIdentifier.dbref = FlyBase
# will use:
# * gene designation
# use the value in the "gene designation" section
# 6239.primaryIdentifier.gene-designation = WormBase
# 6239.gene-designation = gene ID
# will use WBGene00020142
#
#
#
#
#
# * name
# get the identifier from the section of the XML. eg:
# 6239.secondaryIdentifier.name = ORF
# will set CG2328 as the gene.secondaryIdentifier:
#
# eve
# CG2328
#
# if we find an organism not configured in this file
# please don't remove!
default.uniqueField = primaryIdentifier
default.primaryIdentifier.dbref = Ensembl
# human
9606.uniqueField = gene_symbol
9606.gene_symbol.name = primary
9606.Ensembl_identifier.dbref = Ensembl
9606.gene_identifier.dbref = HGNC
9606.NCBI_gene_number.dbref = GeneId
# yeast
559292.uniqueField = primaryIdentifier
559292.gene_symbol.name = primary
559292.primaryIdentifier.dbref = SGD
559292.gene_identifier.dbref = SGD
559292.Ensembl_identifier.gene-designation = EnsemblFungi
559292.gene-designation = gene ID
559292.NCBI_gene_number.dbref = GeneId
# pombe
284812.uniqueField = primaryIdentifier
284812.gene_symbol.name = primary
284812.primaryIdentifier.dbref = GeneDB_Spombe
284812.gene_identifier.dbref = GeneDB_Spombe
284812.Ensembl_identifier.gene-designation = EnsemblFungi
284812.NCBI_gene_number.dbref = GeneId
# Arabidopsis
3702.uniqueField = primaryIdentifier
3702.primaryIdentifier.dbref = TAIR
3702.gene_identifier.dbref = TAIR
3702.gene_symbol.name = primary
3702.NCBI_gene_number.dbref = GeneId
3702.Ensembl_identifier.gene-designation = EnsemblPlants
3702.gene-designation = gene ID
# Cow
9913.uniqueField = gene_symbol
9913.gene_symbol.name = primary
9913.NCBI_gene_number.dbref = GeneId
9913.Ensembl_identifier.dbref = Ensembl
# Fly
7227.uniqueField = primaryIdentifier
7227.primaryIdentifier.dbref = FlyBase
7227.gene_identifier.dbref = FlyBase
7227.NCBI_gene_number.dbref = GeneId
7227.gene_symbol.name = ORF
7227.Ensembl_identifier.gene-designation = EnsemblMetazoa
7227.gene-designation = gene ID
# rat
10116.uniqueField = primaryIdentifier
10116.primaryIdentifier.dbref = RGD
10116.gene_identifier.dbref = RGD
10116.gene_symbol.name = primary
10116.NCBI_gene_number.dbref = GeneId
10116.Ensembl_identifier.dbref = Ensembl
# Mouse
10090.uniqueField = primaryIdentifier
10090.primaryIdentifier.dbref = MGI
10090.gene_identifier.dbref = MGI
10090.gene_symbol.name = primary
10090.Ensembl_identifier.dbref = Ensembl
10090.NCBI_gene_number.dbref = GeneId
# Plasmodium
36329.uniqueField = primaryIdentifier
36329.primaryIdentifier.name = ORF
36329.gene_identifier.name = ORF
36329.gene_symbol.name = primary
36329.NCBI_gene_number.dbref = GeneId
36329.Ensembl_identifier.gene-designation = EnsemblProtists
36329.gene-designation = gene ID
# Tetrahymina
312017.uniqueField = primaryIdentifier
312017.primaryIdentifier.name = ORF
312017.gene_identifier.name = ORF
312017.gene_symbol.name = primary
312017.NCBI_gene_number.dbref = GeneId
#Neurospora
367110.uniqueField = primaryIdentifier
367110.primaryIdentifier.name = ORF
367110.gene_identifier.name = ORF
367110.gene_symbol.name = primary
367110.NCBI_gene_number.dbref = GeneId
367110.Ensembl_identifier.gene-designation = EnsemblFungi
367110.gene-designation = gene ID
# Giardia
184922.uniqueField = primaryIdentifier
184922.primaryIdentifier.name = ORF
184922.gene_identifier.name = ORF
184922.gene_symbol.name = primary
184922.NCBI_gene_number.dbref = GeneId
# only load protein features of these type
# to load ALL feature types, comment out this line
feature.types = initiator methionine, signal peptide,propeptide,short sequence motif,transit peptide,chain,peptide,topological domain,transmembrane region,active site,metal ion-binding site,binding site,site,modified residue,lipid moiety-binding region,glycosylation site,splice variant,sequence variant,unsure residue,strand,turn,helix
# to load NO feature types, uncomment the line below
# feature.types = NONE
# which dbrefs to load as CrossReferences
# not case sensitive
crossReference.dbs = RefSeq, UniGene
# to load NO cross references, uncomment the line below
# crossReference.dbs = NONE