This section describes the format of each table in
autoSql format.
Affymetrix 10K SNP Details
table affy10KDetails
"Information representing the Affymetrix 10K Mapping Array"
(
string affyId; "Affymetrix SNP id"
string rsId; "RS identifier (some are null)"
string tscId; "TSC identifier (some are null)"
char[2] baseA; "The first allele (A)"
char[2] baseB; "The second allele (B)"
char[34] sequenceA; "The A allele with flanking sequence"
char[34] sequenceB; "The B allele with flanking sequence"
char[8] enzyme; "The enzyme that was used to prepare the sample (HindIII
or XbaI)"
)
Affymetrix 120K SNP Array Information
table affyGenoDetails
"Information representing the Affymetrix 120K SNP array"
(
uint affyId; "Affymetrix SNP id"
uint rsId; "RS identifier (some are null)"
char[2] baseA; "The first allele (A)"
char[2] baseB; "The second allele (B)"
char[34] sequenceA; "The A allele with flanking sequence"
char[34] sequenceB; "The B allele with flanking sequence"
char[8] enzyme; "The enzyme that was used to prepare the sample (HindIII
or XbaI)"
float minFreq; "The minimum allele frequency"
float hetzyg; "The heterozygosity from all observations"
float avHetSE; "The Standard Error for the average heterozygosity (not
used)"
char[2] NA04477; "Individual 01"
char[2] NA04479; "Individual 02"
char[2] NA04846; "Individual 03"
char[2] NA11036; "Individual 04"
char[2] NA11038; "Individual 05"
char[2] NA13056; "Individual 06"
char[2] NA17011; "Individual 07"
char[2] NA17012; "Individual 08"
char[2] NA17013; "Individual 09"
char[2] NA17014; "Individual 10"
char[2] NA17015; "Individual 11"
char[2] NA17016; "Individual 12"
char[2] NA17101; "Individual 13"
char[2] NA17102; "Individual 14"
char[2] NA17103; "Individual 15"
char[2] NA17104; "Individual 16"
char[2] NA17105; "Individual 17"
char[2] NA17106; "Individual 18"
char[2] NA17201; "Individual 19"
char[2] NA17202; "Individual 20"
char[2] NA17203; "Individual 21"
char[2] NA17204; "Individual 22"
char[2] NA17205; "Individual 23"
char[2] NA17206; "Individual 24"
char[2] NA17207; "Individual 25"
char[2] NA17208; "Individual 26"
char[2] NA17210; "Individual 27"
char[2] NA17211; "Individual 28"
char[2] NA17212; "Individual 29"
char[2] NA17213; "Individual 30"
char[2] PD01; "Individual 31"
char[2] PD02; "Individual 32"
char[2] PD03; "Individual 33"
char[2] PD04; "Individual 34"
char[2] PD05; "Individual 35"
char[2] PD06; "Individual 36"
char[2] PD07; "Individual 37"
char[2] PD08; "Individual 38"
char[2] PD09; "Individual 39"
char[2] PD10; "Individual 40"
char[2] PD11; "Individual 41"
char[2] PD12; "Individual 42"
char[2] PD13; "Individual 43"
char[2] PD14; "Individual 44"
char[2] PD15; "Individual 45"
char[2] PD16; "Individual 46"
char[2] PD17; "Individual 47"
char[2] PD18; "Individual 48"
char[2] PD19; "Individual 49"
char[2] PD20; "Individual 50"
char[2] PD21; "Individual 51"
char[2] PD22; "Individual 52"
char[2] PD23; "Individual 53"
char[2] PD24; "Individual 54"
)
Affymetrix Transcriptome
table affyTranscriptome
"Describes (x,y) pairs of samples"
ushort bin; "A field to speed indexing"
string chrom; "Chromosome alignment is on"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Unique index given to each block of samples"
uint score; "Overall block score from 0 to 1000"
char[1] strand; "Strand: + or -"
uint sampleCount; "Number of samples in this block"
uint[sampleCount] samplePosition; "Base position of each sample in this
block, relative to chromStart"
uint[sampleCount] sampleHeight; "Score for each sample (y-value) from 0 to
1000"
)
Alternative Splicing
table altGraphX
"An alternatively-spliced gene graph"
(
string tName; "Name of target sequence, often a chrom"
int tStart; "First bac touched by graph"
int tEnd; "Start position in first bac"
string name; "Human readable name"
uint id; "Unique ID"
char[2] strand; "+ or - strand"
uint vertexCount; "Number of vertices in graph"
ubyte[vertexCount] vTypes; "Type for each vertex"
int[vertexCount] vPositions; "Position in target for each vertex"
uint edgeCount; "Number of edges in graph"
int[edgeCount] edgeStarts; "Array with start vertex of edges"
int[edgeCount] edgeEnds; "Array with end vertex of edges"
table evidence[edgeCount] evidence; "Array of evidence tables containing references
to mRNAs that support a particular edge"
int[edgeCount] edgeTypes; "Type for each edge, ggExon, ggIntron, etc"
int mrnaRefCount; "Number of supporting mRNAs"
string[mrnaRefCount] mrnaRefs; "Ids of mrnas supporting this"
int[mrnaRefCount] mrnaTissues; "Ids of tissues that mrnas come from, indexes
into tissue table"
int[mrnaRefCount] mrnaLibs; "Ids of libraries that mrnas come from, indexes
into library table"
)
Atlas Oncology Site
table atlasOncoGene
"Table used to link into ATLAS Oncology site"
(
string locusSymbol; "LocusLink Symbol"
string atlasGene; "ATLAS Gene"
string otherGene; "Other gene"
string url; "URL for corresonding ATLAS web page"
)
Bactig Positions
table bactigPos
"Bactig positions in chromosome coordinates (bed 4 +)."
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Bactig"
string startContig; "First contig in this bactig"
string endContig; "Last contig in this bactig"
)
BGI (Beijing Genomics Institute) SNPs
table bgiSnp
"Beijing Genomics Institute SNP information as bed 4 +"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "BGI SNP name: snp.superctg.ctg.pos.type.strainID"
char[1] snpType; "S (substitution), I (insertion), or D (deletion)"
uint readStart; "Start position in alternate allele read"
uint readEnd; "End position in alternate allele read"
uint qualChr; "Quality score in reference assembly"
uint qualReads; "Quality score in alternate allele read"
string snpSeq; "'X->Y' or indel sequence"
string readName; "Name of alternate allele read"
char[1] readDir; "Direction of read relative to reference"
char[4] inBroiler; "SNP found in Broiler strain? yes, no or n/a if not covered"
char[4] inLayer; "SNP found in Layer strain? yes, no or n/a if not covered"
char[4] inSilkie; "SNP found in Silkie strain? yes, no or n/a if not covered"
string primerL; "Left primer sequence"
string primerR; "Right primer sequence"
char[1] questionM; "L for dubious indels, H for other indels and SNPs"
string extra; "Additional information"
)
Bio Cyc Pathway Map
table bioCycMapDesc
"Decription of BioCyc pathway maps"
(
string mapID; "BioCyc pathway map"
string description; "BioCyc pathway map description"
)
Bio Cyc Pathway
table bioCycPathway
"BioCyc Pathway to Known Gene cross reference"
(
string kgID; "Known Gene ID"
string geneID; "Gene (RefSeq) ID"
string mapID; "BioCyc pathway map ID"
)
Blast Output
The following definition is used for several tables that contain blast
for various organisms.
table blastTab
"Tab-delimited blast output file"
(
string query; "Name of query sequence"
string target; "Name of target sequence"
float identity; "Percent identity"
uint aliLength; "Length of alignment"
uint mismatch; "Number of mismatches"
uint gapOpen; "Number of gap openings"
uint qStart; "Start in query (0 based)"
uint qEnd; "End in query (non-inclusive)"
uint tStart; "Start in target (0 based)"
uint tEnd; "End in target (non-inclusive)"
double eValue; "Expectation value"
double bitScore; "Bit score"
)
Browser Extensible Data (BED)
The following definition is used for several tables, which may use 4 or more of
the fields.
table bed
"Browser extensible data"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
uint score; "Score from 0-1000"
char[1] strand; "+ or -"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Always zero for now"
int blockCount; "Number of blocks (a block contains no gaps)"
int[blockCount] blockSizes; "Comma-separated list of block sizes"
int[blockCount] blockStarts; "Start positions relative to chromStart."
int expCount; "Number of experiments."
int[expCount] expIds; "Experiment IDs."
int[expCount] expScores; "Experiment scores."
)
Celera Coverage
table celeraCoverage
"Summary of large genomic Duplications from Celera Data"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Source of information"
)
Celera Duplications
table celeraDupPositive
"Summary of large genomic Duplications from Celera Data"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Celera accession name"
string fullname; "Celera accession full name"
float fracMatch; "Fraction of matching bases"
float bpalign; "Base pair alignment score"
)
Celera Overlay
table celeraOverlay
"Celera assembly overlay in the public (NCBI) assembly"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Other chromosome and start"
string otherChrom; "Other chromosome"
uint otherStart; "Start in other chromosome"
uint otherEnd; "End in other chromosome"
char orient; "Strand/orientation (F or R)"
char field1; "? (always M)"
char field2; "? (r or u)"
string localId; "?"
string parentInfo; "?"
string refId; "Reference ID"
uint refStart; "Start position in reference"
uint refLength; "Length in reference"
byte refOrient; "Strand/orientation in reference (-1 or 1)"
string queryId; "Query ID"
uint queryStart; "Start position in query"
uint queryLength; "Length in query"
byte queryOrient; "Strand/orientation in query (-1 or 1)"
)
CGAP Alias
table cgapAlias
"Associates CGAP pathway IDs with gene symbols or mRNA accessions"
(
string cgapID; "CGAP pathway ID"
string alias; "Gene symbol or mRNA"
)
CGAP/BioCarta Pathway Descriptions
table cgapBiocDesc
"CGAP/BioCarta pathway description"
(
string mapID; "CGAP/BioCarta pathway map ID"
string description; "Description"
)
CGAP/BioCarta Pathway Cross-Reference
table cgapBiocPathway
"CGAP BioCarta pathway cross reference"
(
string cgapID; "CGAP pathway ID"
string mapID; "BioCarta pathway ID"
)
Chain Alignments
table chain
"Summary info about a chain of alignments"
(
double score; "Score of chain"
string tName; "Target sequence name"
uint tSize; "Target sequence size"
uint tStart; "Alignment start position in target"
uint tEnd; "Alignment end position in target"
string qName; "Query sequence name"
uint qSize; "Query sequence size"
char qStrand; "Query strand"
uint qStart; "Alignment start position in query"
uint qEnd; "Alignment end position in query"
uint id; "Chain ID"
}
Chain Link
table chainLink
"Alignment block in chain"
(
string tName; "Target sequence name"
uint tStart; "Alignment start position in target"
uint tEnd; "Alignment end position in target"
uint qStart; "Start in query"
uint chainId; "Chain ID in chain table"
)
Chimp/Human Simple Differences
table chimpSimpleDiff
"Simple differences between the chimp and human genome assemblies"
(
string chrom; "Chromosome number"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string tseq; "Target sequence (A,T,C,G)"
string qseq; "Query sequence (A,T,C,G)"
)
Chromosome Band Information
table cytoBand
"Describes the positions of cytogenetic bands within a chromosome"
(
string chrom; "Chromosome number"
uint chromStart; "Start position in genoSeq"
uint chromEnd; "End position in genoSeq"
string name; "Name of cytogenetic band"
string gieStain; "Giesma stain results"
)
Chromosome Information
table chromInfo
"Chromosome names and sizes"
(
string chrom; "Chromosome name"
uint size; "Chromosome size"
string fileName; "Chromosome file (raw one byte per base)"
)
Clone Fragment Positions
table chrN_gl
"Fragment positions in golden path"
(
ushort bin; "A field to speed indexing added to many
tables August 2001"
uint matches; "Number of bases that match that aren't repeats"
uint misMatches; "Number of bases that don't match"
string frag; "Fragment name"
uint start; "Start position in golden path"
uint end; "End position in golden path"
char[1] strand; "+ or - for strand"
)
Clone Information
table clonePos
"A clone's position and other info."
(
string name; "Name of clone including version"
uint seqSize; "base count not including gaps"
ubyte phase; "htg phase"
string chrom; "Chromosome name"
uint chromStart; "Start in chromosome"
uint chromEnd; "End in chromosome"
char[1] stage; "F/D/P for finished/draft/predraft"
string faFile; "File with sequence."
)
Contamination Information
table contamination
"Contamination in assembly (bed 4 +)"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name (type of contamination: E.coli, vector etc)"
string acc; "NCBI accession where contamination was found"
string ctg; "Alternate name (e.g. WGS contig name)"
int len; "Length of contaminated portion of sequence"
)
Contig/Accession Map
table contigAcc
"Maps a contig to its accession."
(
string contig; "Contig name"
string acc; "GenBank accession"
)
Contig Information
table ctgPos
"Where a contig is inside of a chromosome."
(
string contig; "Name of contig"
uint size; "Size of contig"
string chrom; "Chromosome name"
uint chromStart; "Start in chromosome"
uint chromEnd; "End in chromosome"
)
Contig Information (Expanded)
table ctgPos2
"Where a contig is inside of a chromosome including contig type information."
(
string contig; "Name of contig"
uint size; "Size of contig"
string chrom; "Chromosome name"
uint chromStart; "Start in chromosome"
uint chromEnd; "End in chromosome"
char[1] type; "(W)GS contig, (F)inished, (P)redraft, (D)raft, (O)ther"
)
CpG Islands
table cpgIsland
"Describes CpG Islands of 50 bases or more that are at least 50% G/C"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "CpG Island"
uint minLength; "Minimum Island Length"
uint cpgNum; "Number of CpGs in island"
uint gcNum; "Number of C and G in island"
uint perCpg; "Percentage of island that is CpG"
uint perGc; "Percentage of island that is C or G"
)
CpG Islands (Extended)
table cpgIslandExt
"Describes the CpG Islands (includes observed/expected ratio)"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "CpG Island"
uint length; "Island Length"
uint cpgNum; "Number of CpGs in island"
uint gcNum; "Number of C and G in island"
uint perCpg; "Percentage of island that is CpG"
uint perGc; "Percentage of island that is C or G"
float obsExp; "Ratio of observed(cpgNum) to expected(numC*numG/length)
CpG in island"
)
dbSnpRS
table dbSnpRS
"Information from dbSNP at the reference SNP level"
(
uint rsId; "RS identifier"
float avHet; "Average heterozygosity from all observations"
float avHetSE; "Standard Error for the average heterozygosity from all
observations"
string valid; "Validation status of the SNP"
char[1] base1; "Base of the first allele"
char[1] base2; "Base of the second allele"
string assembly; "Sequence in the ucsc assembly"
string alternate; "Sequence of the alternate allele"
)
DNA Motif
table dnaMotif
"A gapless DNA motif"
(
string name; "Motif name"
int columnCount; "Count of columns in motif"
float[columnCount] aProb; "Probability of A's in each column"
float[columnCount] cProb; "Probability of C's in each column"
float[columnCount] gProb; "Probability of G's in each column"
float[columnCount] tProb; "Probability of T's in each column"
)
DupSpMrna
table dupSpMrna
"Duplicate mRNA/Protein entries that have identical CDS structures"
(
string mrnaID; "mRNA ID name"
string proteinID; "Protein ID from UniProtKB"
string dupMrnaID; "mRNA ID of duplicated entry"
string dupProteinId; "Protein ID of duplicated entry"
)
EncodeRegionInfo
table encodeRegionInfo
"Descriptive, assembly-independent information about ENCODE regions"
(
string name; "Name of region"
string descr; "Description (gene region, random pick, etc.)"
)
Ensembl Gene XRef
table ensGtp
"Associates Ensembl gene, transcript, and protein IDs"
(
string gene; "Gene ID"
string transcript; "Transcript ID"
string protein; "Protein ID"
)
EnsemblXref
table ensemblXref
"Cross-reference info between transcript entries and translation entries of Ensembl
genes"
(
string db; "Ensembl Database"
int gene_id; "Gene ID number"
string gene_name; "Gene name in database"
int transcript_id; "Transcript ID number"
string transcript_name; "Transcript name in database"
uint translation_id; "Translation ID number"
string translation_name; "Translation name in database"
string external_db; "External database"
string external_name; "External status"
)
EnsemblXref2
table ensemblXref2
"Cross-reference info between transcript entries and translation entries of Ensembl
genes (alternative table used in some assemblies)"
(
string transcript_name; "Transcript name"
string translation_name; "Translation name"
)
EnsemblXref3
table ensemblXref3
"A cross-reference table for Ensembl Genes."
(
string gene; "Ensembl gene ID"
string geneVer; "Ensembl gene ID version number"
string transcript; "Ensembl transcript ID"
string transcriptVer; "Ensembl transcript ID version number"
string protein; "Ensembl protein ID"
string proteinVer; "Ensembl protein version number"
string tremblAcc; "UniProtKB protein accession number"
string swissDisplayId; "UniProtKB protein display ID"
string swissAcc; "UniProtKB protein accession number"
)
Ensembl gene_xref Table
Ensemble gene_xref table downloaded directly from the Ensembl ftp site (18.34
release 11/04). Refer to the Ensembl site for details. CAUTION: Ensembl sometimes
changes its table definitions and some fields may not contain the data that the
name indicates, e.g. translation_name.
table ensGeneXref
"Table gene_xref downloaded from Ensembl"
(
string db; "#"
string analysis; "#"
string type; "#"
int gene_id; "#"
string gene_name; "#"
int[5] gene_version; "#"
int transcript_id; "#"
string transcript_name; "#"
int[5] transcript_version; "#"
int[5] translation_name; "#"
int translation_id; "#"
int[5] translation_version; "#"
string external_db; "#"
string external_name; "#"
char[10] external_status; "#"
)
Ensembl transcript Table
Ensembl transcript table downloaded directly from the Ensembl ftp site (18.34
release 11/04). Refer to the Ensembl site for details. CAUTION: Ensembl
sometimes changes its table definitions and some fields may not contain the data
that the name indicates.
table ensTranscript
"Table transcript downloaded from Ensembl"
(
int id; "#"
string db; "#"
string analysis; "#"
string type; "#"
int transcript_id; "#"
string transcript_name; "#"
int transcript_version; "#"
string chr_name; "#"
int chr_start; "#"
int chr_end; "#"
int chr_strand; "#"
int coding_start; "#"
int coding_end; "#"
int translation_id; "#"
string translation_name; "#"
string translation_version; "#"
int gene_id; "#"
string gene_name; "#"
int gene_version; "#"
lstring exon_structure; "#"
lstring exon_ids; "#"
string external_db; "#"
string external_name; "#"
char[10] external_status; "#"
)
Exofish Information
table exoFish
"An evolutionarily conserved region (ecore) with Tetroadon"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Just a dummy dot"
uint score; "Score from 0 to 1000"
)
Exonerate Mouse Homologies
Last used Apr. 2001.
table exoMouse
"A rough alignment - not detailed"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of other sequence"
uint score; "Score from 0 to 1000"
char[1] strand; " or -"
uint otherStart; "Start in other sequence"
uint otherEnd; "End in other sequence"
)
Experiment Record
table expRecord
"Minimal descriptive data for an experiment in the browser"
(
uint id; "Internal id of experiment"
string name; "Name of experiment"
lstring description; "Description of experiment"
lstring url; "URL relevant to experiment"
lstring ref; "Reference for experiment"
lstring credit; "Who to credit with experiment"
uint numExtras; "Number of extra things"
lstring[numExtras] extras; "Extra things of interest, i.e. classifications"
)
Expression Data
table expData
"Expression data (no mapping, just spots)"
(
string name; "Name of gene/target/probe etc."
uint expCount; "Number of scores"
float[expCount] expScores; "Scores. May be absolute or relative ratio"
)
Expression Distance Between 2 Genes
table expDistance
"Distance between two genes in expression space"
(
string query; "Name of one gene"
string target; "Name of other gene"
float distance; "Distance in expression space, always >= 0"
)
FISH Clones
table fishClones
"Describes the positions of fishClones in the assembly"
(
string chrom; "Chromosome number"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of clone"
uint score; "Always 1000"
uint placeCount; "Number of times FISH'd"
string[placeCount] bandStarts; "Start FISH band"
string[placeCount] bandEnds; "End FISH band"
string[placeCount] labs; "Lab where clone FISH'd"
string placeType; "How clone was placed on the sequence assembly"
uint accCount; "Number of accessions associated with the clone"
string[accCount] accNames; "Accession associated with clone"
uint stsCount; "Number of STS markers associated with this clone"
string[stsCount] stsNames; "Names of STS markers"
uint beCount; "Number of BAC end sequences associated with this
clone"
string[beCount] beNames; "Accessions of BAC ends"
)
FlyBase Accessions Linked to UniProtKB Accessions
table flyBaseSwissProt
"FlyBase acc to UniProtKB acc, plus some other UniProtKB info"
(
string flyBaseId; "FlyBase FBgn ID"
string swissProtId; "UniProtKB ID"
string spGeneName; "Gene name from UniProtKB (long)"
string spSymbol; "Symbolic-looking gene ID from UniProtKB"
)
FlyBase Alleles
table fbAllele
"The alleles of a gene"
(
int id; "Allele ID"
string geneId; "FlyBase ID of gene"
string name; "Allele name"
)
FlyBase Gene Synonyms
table fbSynonym
"Links all the names we call a gene to its FlyBase ID"
(
string geneId; "FlyBase ID"
string name; "A name (synonym or real)"
)
FlyBase Genes
table fbGene
"Links FlyBase IDs, gene symbols and gene names"
(
string geneId; "FlyBase ID"
string geneSym; "Short gene symbol"
string geneName; "Gene name - up to a couple of words"
)
FlyBase Genes Cross-Reference (2004)
table flyBase2004Xref
"FlyBase cross references circa late 2004 (dm2/4.0; dp3/1.0 uses a subset)"
(
string name; "FlyBase annotation gene ID"
string symbol; "Symbolic gene name"
lstring synonyms; "Comma-separated list of synonyms"
string fbtr; "FlyBase FBtr acc"
string fbgn; "FlyBase FBgn acc"
string fbpp; "FlyBase FBpp acc"
string fban; "FlyBase FBan acc"
string type; "Annotation type (for noncoding only)"
)
FlyBase Genes Linked to BDGP Transcript IDs
table fbTranscript
"Links FlyBase gene IDs and BDGP transcript IDs"
(
string geneId; "FlyBase gene ID"
string transcriptId; "BDGP Transcript ID"
)
FlyBase Genes Linked to GO IDs
table fbGo
"Links FlyBase gene IDs and GO IDs/aspects"
(
string geneId; "FlyBase gene ID"
string goId; "GO ID"
string aspect; "P (process), F (function) or C (cellular component)"
)
FlyBase Observed Phenotype in Mutant
table fbPhenotype
"Observed phenotype in mutant. Sometimes contains gene function info"
(
string geneId; "FlyBase gene ID"
int fbAllele; "ID in fbAllele table or 0 if not allele-specific"
int fbRef; "ID in fbRef table"
lstring text; "Descriptive text"
)
FlyBase Reference
table fbRef
"A literature or sometimes database reference"
(
int id; "Reference ID"
lstring text; "Usually begins with FlyBase ref ID, but not always"
)
FlyBase Role of Gene in Wildtype
table fbRole
"Role of gene in wildType"
(
string geneId; "FlyBase gene ID"
int fbAllele; "ID in fbAllele table or 0 if not allele-specific"
int fbRef; "ID in fbRef table"
lstring text; "Descriptive text"
)
FlyReg Data
table flyreg
"Flyreg data from Casey Bergman"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Factor"
string target; "Target"
uint pmid; "PubMed ID"
)
FlyReg Data (version 2)
table flyreg
"Flyreg data (version 2) from Casey Bergman"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Factor"
string target; "Target"
uint pmid; "PubMed ID"
uint fpid; "Footprint ID -- stable ID across versions"
)
Fosmid End Pairs
table fosEndPairs
"Positions of end pairs for fosmids"
(
short bin; "Bin number for browser speedup"
string chrom; "Chromosome"
uint chromStart; "Start position of fosmid in chromosome"
uint chromEnd; "End position of fosmid in chromosome"
string name; "Name of fosmid"
uint score; "Score = 1000/(# of times fosmid appears in assembly)"
char[1] strand; "Value should be + or -"
string pslTable; "Table which contains corresponding PSL records for linked
features"
uint lfCount; "Number of linked features in the series"
uint[lfCount] lfStarts; "Comma-separated list of start positions of each linked
feature in genomic"
uint[lfCount] lfSizes; "Comma-separated list of sizes of each linked feature
in genomic"
string[lfCount] lfNames; "Comma-separated list of names of linked features"
)
Gap Positions
table chrN_gap
"Gaps in golden path"
(
ushort bin; "A field to speed indexing added to many
tables August 2001"
uint matches; "Number of bases that match that aren't repeats"
uint misMatches; "Number of bases that don't match"
string chrom; "which chromosome"
uint chromStart; "start position in chromosome"
uint chromEnd; "end position in chromosome"
int ix; "ix of this fragment (useless)"
char[1] n; "always 'N'"
uint size; "size of gap"
string type; "contig, clone, fragment, etc."
string bridge; "yes, no, mrna, bacEndPair, etc."
)
GC Content
table gcPercent
"Displays GC content in 20Kb non-overlapping blocks for genome"
(
string chrom; "Chromosome number"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Constant string GC"
uint gcPpt; "Average number of G and C per 1000 bases"
)
GenBank Protein Annotations
table gbProtAnn
"Protein Annotations from GenPept mat_peptide fields"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
string product; "Protein product name"
string note; "Note (may be empty)"
string proteinId; "GenBank protein accession(.version)"
uint giId; "GenBank db_xref number"
)
Gene Bands
table geneBands
"Band locations of known genes"
(
string name; "Gene name (HUGO Gene Nomenclature Committee symbol, if possible)"
string mrnaAcc; "RefSeq mRNA accession"
int count; "Number of times this accession maps to the genome"
string[count] bands; "List of chromosome bands to which it maps"
)
Gene Boundaries as Defined by RNA and Spliced
EST Clusters
table rnaCluster
"Gene boundaries deduced from clustering spliced ESTs and mRNAs against the genome"
(
smallint bin; "Index"
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
uint score; "Score from 0-1000"
char[1] strand; "+ or -"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Always zero for now"
int blockCount; "Number of blocks (a block contains no gaps)"
longblob blockSizes; "Size of each block"
longblob chromStarts; "Start position of each block in chromosome"
)
Gene Predictions and RefSeq Genes
The following definition is used for gene prediction tables. In
alternative-splicing situations, each transcript has a row in this table.
table genePred
"A gene prediction."
(
string name; "Name of gene"
string chrom; "Chromosome name"
char[1] strand; "+ or - for strand"
uint txStart; "Transcription start position"
uint txEnd; "Transcription end position"
uint cdsStart; "Coding region start"
uint cdsEnd; "Coding region end"
uint exonCount; "Number of exons"
uint[exonCount] exonStarts; "Exon start positions"
uint[exonCount] exonEnds; "Exon end positions"
)
Gene Predictions (Extended)
The following definition is used for extended gene prediction tables. In
alternative-splicing situations, each transcript has a row in this table.
table genePredExt
"A gene prediction with some additional info."
(
string name; "Name of gene (usually transcript_id from GTF)"
string chrom; "Chromosome name"
char[1] strand; "+ or - for strand"
uint txStart; "Transcription start position"
uint txEnd; "Transcription end position"
uint cdsStart; "Coding region start"
uint cdsEnd; "Coding region end"
uint exonCount; "Number of exons"
uint[exonCount] exonStarts; "Exon start positions"
uint[exonCount] exonEnds; "Exon end positions"
uint id; "Unique identifier"
string name2; "Alternate name (e.g. gene_id from GTF)"
string cdsStartStat; "enum('none','unk','incmpl','cmpl')"
string cdsEndStat; "enum('none','unk','incmpl','cmpl')"
lstring exonFrames; "Exon frame offsets {0,1,2}"
)
Gene Predictions and RefSeq Genes with Gene Names
A version of genePred that associates the gene name with the gene prediction
information. In alternative splicing situations each transcript has a row in this table.
table refFlat
"A gene prediction with additional geneName field."
(
string geneName; "Name of gene as it appears in Genome Browser."
string name; "Name of gene"
string chrom; "Chromosome name"
char[1] strand; "+ or - for strand"
uint txStart; "Transcription start position"
uint txEnd; "Transcription end position"
uint cdsStart; "Coding region start"
uint cdsEnd; "Coding region end"
uint exonCount; "Number of exons"
uint[exonCount] exonStarts; "Exon start positions"
uint[exonCount] exonEnds; "Exon end positions"
)
Gene Predictions and RefSeq Genes Amino Acid Translations
The following definition is used for tables that link a predicted peptide to
a predicted gene: acemblyPep, ECgenePep, ensPep, genieAltPep, genieKnownPep,
genscanPep, knownGenePep, refPep, sanger22Pep, softberryPep, twinscanPep, vegaPep.
table pepPred
"A predicted peptide linked to a predicted gene."
(
string name; "Name of gene - same as in genePred"
lstring seq; "Peptide sequence"
)
GenMapDB BAC Clones
table genMapDb
"BAC clones from GenMapDB placed on the assembly by U Penn (V. Cheung)"
(
string chrom; "Chromosome number or 'unknown'"
int chromStart; "Start position in chromosome (-1 if unpositioned)"
int chromEnd; "End position in chromosome"
string name; "Clone name"
uint score; "Score - always 1000"
char[1] strand; "+ or -"
string accT7; "Accession number for T7 BAC end sequence"
uint startT7; "T7 start position in chromosome"
uint endT7; "T7 end position in chromosome"
char[1] strandT7; "+ or -"
string accSP6; "Accession number for Sp6 BAC end sequence"
uint startSP6; "Sp6 start position in chromosome"
uint endSP6; "Sp6 end position in chromosome"
char[1] strandSP6; "+ or -"
string stsMarker; "Name of STS marker found in clone"
uint stsStart; "STS marker start position in chromosome"
uint stsEnd; "STS marker end position in chromosome"
)
Golden Path Construction
table chrN_gold
"How to get through chromosome based on fragments"
(
ushort bin; "A field to speed indexing added to many
tables August 2001"
uint matches; "Number of bases that match that aren't repeats"
uint misMatches; "Number of bases that don't match"
string chrom; "Which chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
int ix; "Ix of this fragment (useless)"
char[1] type; "(P)redraft, (D)raft, (F)inished or (O)ther"
string frag; "Which fragment"
uint fragStart; "Start position in frag"
uint fragEnd; "End position in frag"
char[1] strand; "+ or - (orientation of fragment)"
)
H-Invitational Human Gene Annotation Database Data
table HInv
"H-Invitational gene data"
(
string geneId; "Gene ID"
string clusterId; "Gene cluster ID"
string mrnaAcc; "GenBank mRNA accession"
)
HgFind Search Information
table hgFindSpec
"Defines a search to be performed by hgFind"
(
string searchName; "Unique name for this search. Defaults to searchTable
if not specified in .ra"
string searchTable; "(Non-unique!) Table to be searched. (Like trackDb.tableName:
if split, omit chr*_ prefix)"
string searchMethod; "Type of search (exact, prefix, fuzzy)"
string searchType; "Type of search (bed, genePred, knownGene etc)"
ubyte shortCircuit; "If nonzero and there is a result from this search, jump
to the result instead of performing other searches"
string termRegex; "Regular expression (see man 7 regex) to eval on search
term: if it matches, perform search query"
string query; "sprintf format string for SQL query on a given table
and value"
string xrefTable; "If search is xref, perform xrefQuery on search term,
then query with that result"
string xrefQuery; "sprintf format string for SQL query on a given (xref)
table and value"
float searchPriority; "0-1000 - relative order/importance of this search.
0 is top"
string searchDescription; "Description of table/search (default: trackDb
{longLabel,tableName})"
lstring searchSettings; "Name/value pairs for searchType-specific stuff"
)
Hugo Gene Nomenclature Committee (HGNC) Cross-Reference
table hugo
"A cross-reference table between HGNC and other databases"
(
string hgnc; "HGNC number"
string symbol; "HGNC gene symbol"
string name; "HGNC gene name"
string map; "Gene map location"
string mim; "MIM ID"
string pmid1; "pmid 1"
string pmid2; "pmid 2"
string refseq; "RefSeq ID"
string aliases; "Aliases"
string widthdraws; "Withdrawn gene symbols"
string locuslink; "LocusLink ID"
string gdbID; "GDB ID"
string swissprot; "UniProtKB ID"
)
Human Synteny Alignments
table syntenyHuman
"Human synteny alignnments from blastz single coverage"
(
string chrom; "Mouse Chrom"
uint chromStart; "Start on Mouse"
uint chromEnd; "End on Mouse"
string name; "Human Chromosome"
uint score; "Score (always zero)"
char[1] strand; "+ direction matches - opposite"
uint thickStart; "Start of where display should be thick"
uint thickEnd; "End of where display should be thick"
)
ImageClone
table imageClone
"Used with image consortium's cumulative_plate files: ftp://image.llnl.gov/image/outgoing"
(
uint id; "IMAGE clone ID"
string library; "Clone collection (LLAM for amp-resistant libraries,
LLCM for chloramphenicol-resistant libraries, LLKM
for kanamycin-resistant libraries. No rearray
locations are given.)"
uint plateNum; "Plate number"
string row; "Row"
uint column; "Column"
uint libId; "IMAGE library ID"
string organism; "Species"
int numGenbank; "Number of GenBank records"
string[numGenbank] genbankIds; "GenBank accession number(s)"
)
InterPro Cross Reference
table interProXref
"A cross-reference table for InterPro."
(
string accession; "UniProtKB accession number"
string method; "Method"
int start; "Start position"
int end; "End position"
string interProId; "InterPro ID"
string description; "Description"
)
Jackson Lab Mouse Orthologs
table jaxOrtholog
"Jackson Lab Mouse Orthologs"
(
string humanSymbol; "Human HUGO Gene Nomenclature Committee symbol"
string humanBand; "Human chromosomal location"
string mgiId; "Mouse database id"
string mouseSymbol; "Mouse human symbol"
string mouseChr; "Mouse chromosome"
string mouseCm; "Mouse genetic map positionin centimorgans"
string mouseBand; "Mouse chromosome band if any"
)
Jackson Lab Quantitative Trait Loci
table jaxQTL
"Quantitative Trait Loci from Jackson Lab/Mouse Genome Informatics"
(
string chrom; "Human chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
uint score; "Score from 0-1000 (bed6 compat.)"
char[1] strand; "+ or - (bed6 compat.)"
string marker; "MIT SSLP Marker w/highest correlation"
string mgiID; "MGI ID"
string description; "MGI description"
float cMscore; "cM position of marker associated with peak LOD score"
)
Kegg Pathway Map Description
table keggMapDesc
"Description of KEGG pathway map"
(
string mapID; "KEGG pathway map ID"
string description; "KEGG pathway map description"
)
Kegg Pathway Cross Reference
table keggPathway
"Associates KEGG pathway IDs with Known Genes and LocusLink IDs"
(
string kgID; "Known Gene ID"
string locusID; "LocusLink ID"
string mapID; "KEGG pathway map ID"
)
Known Canonical
table knownCanonical
"Describes the canonical splice variant of a gene"
(
string chom; "Chromosome"
int chromStart; "Start position (0 based). Corresponds to txStart"
int chromEnd; "End position (non-inclusive). Corresponds to txEnd"
int clusterId; "Which cluster of transcripts this belongs to in knownIsoforms"
string transcript; "Corresponds to knownGene name field"
string protein; "UniProtKB ID of associated protein"
)
Known Genes
table knownGene
"Protein coding genes based on proteins from UniProtKB
and their corresponding mRNAs from GenBank"
(
string name; "Name of gene"
string chrom; "Chromosome name"
char[1] strand; "+ or - for strand"
uint txStart; "Transcription start position"
uint txEnd; "Transcription end position"
uint cdsStart; "Coding region start"
uint cdsEnd; "Coding region end"
uint exonCount; "Number of exons"
uint[exonCount] exonStarts; "Exon start positions"
uint[exonCount] exonEnds; "Exon end positions"
string proteinID; "UniProtKB ID"
string alignID; "Unique identifier for each (known gene, alignment
position) pair"
)
Known Genes Alias
table kgAlias
"Links together a Known Gene ID and a gene alias"
(
string kgID; "Known Gene ID"
string alias; "A gene alias"
)
Known Genes Associations
The following definition describes tables that associate Known Genes with other
IDs and accessions.
table knownTo
"Maps Known Gene to another ID"
(
string name; "Same as name field in known gene"
string value; "Other id"
)
Known Genes Cross Reference
table kgXref
"Links a Known Gene ID with mRNA, UniProtKB, RefSeq, and NCBI accessions/IDs"
(
string kgID; "Known Gene ID"
string mRNA; "mRNA ID"
string spID; "UniProtKB protein Accession number"
string spDisplayID; "UniProtKB display ID"
string geneSymbol; "Gene Symbol"
string refseq; "RefSeq ID"
string protAcc; "NCBI protein Accession number"
string description; "Description"
)
Known Genes Isoforms
The following definition describes tables that link together various transcripts
of a gene into a cluster.
table knownIsoforms
"Links together various transcripts of a gene into a cluster"
(
int clusterId; "Unique id for transcript cluster (aka gene)"
string transcript; "Corresponds to name in knownGene table, transcript in
knownCanonical"
)
Known Genes Link
table knownGeneLink
"Known Genes link table, currently storing DNA-based entries only"
(
string name; "Known Genes ID"
char[1] seqType; "Known Genes sequence type - mRNA-base genes=m, DNA-based genes=g"
string proteinID; "Corresponding protein ID"
)
Known Genes Protein Alias
table kgProtAlias
"Links together a Known Gene ID and a protein alias"
(
string kgID; "Known Gene ID"
string displayID; "protein display ID"
string alias; "A protein alias"
)
Known Genes Superfamily Link
table knownToSuper
"Maps protein superfamilies to known genes"
(
string gene; "Known gene ID"
int superfamily; "Superfamily ID"
int start; "Start of superfamily domain in protein (0 based)"
int end; "End (noninclusive) of superfamily domain"
float eVal; "E value of superfamily assignment"
)
Known Genes UniProtKB Alias
table kgSpAlias
"Links together a Known Gene ID and a protein alias"
(
string kgID; "Known Gene ID"
string spID; "UniProtKB protein accession number"
string alias; "Alias - could be either a gene alias or protein alias"
)
Known (RefSeq) Genes IDs in Other Databases
Last used Oct. 2000
table knownMore
"Lots of auxiliary info about a known gene"
(
string name; "The name displayed in the browser:
OMIM, gbGeneName, or transId"
string transId; "Transcript ID. Genie-generated ID"
string geneId; "Gene (not transcript) Genie ID"
uint gbGeneName; "Connect to geneName table. GenBank gene name"
uint gbProductName; "Connects to productName table. GenBank product name"
string gbProteinAcc; "GenBank accession of protein"
string gbNgi; "GenBank GI of nucleotide sequence"
string gbPgi; "GenBank GI of protein sequence"
uint omimId; "OMIM ID or 0 if none"
string omimName; "OMIM primary name"
uint hugoId; "HUGO Gene Nomeclature Committee (HGNC) ID, or 0 if none"
string hugoSymbol; "HGNC short name"
string hugoName; "HGNC descriptive name"
string hugoMap; "HGNC Map position"
uint pmId1; "Data from HGNC nomeids.txt"
uint pmId2; "Data from HGNC nomeids.txt"
string refSeqAcc; "Accession of RefSeq mRNA"
string aliases; "Aliases, if any. Comma and space separated list"
uint locusLinkId; "Locus link ID"
string gdbId; "NCBI GDB database ID"
)
Known (RefSeq) Genes - Other Information
Last used Oct. 2000
table knownInfo
"Auxiliary info about a known gene"
(
string name; "Connects with genieKnown->name"
string transId; "Transcript id. Genie-generated ID"
string geneId; "Gene (not transcript) ID"
uint geneName; "Connect to geneName table"
uint productName; "Connects to productName table"
string proteinId; "GenBank accession of protein?"
string ngi; "GenBank gi of nucleotide seq"
string pgi; "GenBank gi of protein seq"
)
MGC Full Status
table mgcFullStatus
"Status of MGC clone"
(
uint imageId; "Image ID for clone"
enum status; "MGC status code: unpicked, picked, notBack, noDecision,
fullLength, incomplete, chimeric, frameShift, contaminated,
retainedIntron, mixedWells, noGrowth, noInsert, no5est,
microDel, artifact, noPolyATail, or cantSequence. Status will
always be fullLength on main databases."
char acc; "GenBank accession"
char organism; "Two-letter MGC organism"
)
MGI ID
table MGIid
"Links MGI ID to Locus Link ID (Mouse)"
(
char[8] LLid; "Locus Link ID"
char[12] MGIid; "MGI ID"
char[32] symbol; "Symbol"
)
Microsatellites from Gerome Breen's VNTR Program"
table vntr
"Microsatellites from Gerome Breen's VNTR program"
(
string chrom; "chrom"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item (Repeat unit)"
float repeatCount; "Number of perfect repeats"
int distanceToLast; "Distance to previous microsat. repeat"
int distanceToNext; "Distance to next microsat. repeat"
string forwardPrimer; "Forward PCR primer sequence (or Design_Failed)"
string reversePrimer; "Reverse PCR primer sequence (or Design_Failed)"
string pcrLength; "PCR product length (or Design_Failed)"
)
Mouse Synteny
Last used Apr. 2001.
table mouseSyn
"Synteny between mouse and human chromosomes."
(
string chrom; "Name of chromosome"
uint chromStart; "Start in chromosome"
uint chromEnd; "End in chromosome"
string name; "Name of mouse chromosome"
int segment; "Number of segment"
)
Mouse Synteny Alignments
table syntenyMouse
"Mouse synteny alignnments from blastz single coverage"
(
string chrom; "Human chromosome"
uint chromStart; "Start on human chromosome"
uint chromEnd; "End on human chromosome"
string name; "Mouse chromosome"
uint score; "Score (always zero)"
char[1] strand; "+ (direction matches) or - (opposite direction)"
uint thickStart; "Start of where display should be thick"
uint thickEnd; "End of where display should be thick"
)
Mouse Synteny (Whitehead)
table mouseSynWhd
"Whitehead synteny between mouse and human chromosomes."
(
string chrom; "Human chromosome"
uint chromStart; "Start position in human chromosome"
uint chromEnd; "End position in human chromosome"
string name; "Name of mouse chromosome"
uint score; "Unused (bed 6 compatibility)"
char[1] strand; "+ or - (orientation of fragment)"
uint mouseStart; "Start position in mouse chromosome"
unit mouseEnd; "End position in mouse chromosome"
string segLabel; "Whitehead segment label"
)
mRNA Sequences
table mrnaPred
"A mRNA - linked to a gene."
(
string name; "Name of gene - same as in genePred"
lstring seq; "mRNA sequence"
)
mrnaRefSeq
table mrnaRefseq
"Associates mRNA IDs with RefSeq IDs"
(
string mrna; "mRNA ID"
string refseq; "RefSeq ID"
)
mRNA/EST/Blat Alignments
The following definition is used for the blat and blastz tables, EST tables, mRNA
tables, dbtssAli, and uniGene.
table psl
"Summary info about a patSpace alignment"
(
ushort bin; "A field to speed indexing"
uint matches; "Number of bases that match that aren't repeats"
uint misMatches; "Number of bases that don't match"
uint repMatches; "Number of bases that match but are part of repeats"
uint nCount; "Number of 'N' bases"
uint qNumInsert; "Number of inserts in query"
int qBaseInsert; "Number of bases inserted in query"
uint tNumInsert; "Number of inserts in target"
int tBaseInsert; "Number of bases inserted in target"
char[2] strand; "+ or - for query strand. For translated alignments,
second +/- is for genomic strand"
string qName; "Query sequence name"
uint qSize; "Query sequence size"
uint qStart; "Alignment start position in query"
uint qEnd; "Alignment end position in query"
string tName; "Target sequence name"
uint tSize; "Target sequence size"
uint tStart; "Alignment start position in target"
uint tEnd; "Alignment end position in target"
uint blockCount; "Number of blocks in alignment (a block contains no gaps)"
uint[blockCount] blockSizes; "Size of each block"
uint[blockCount] qStarts; "Start of each block in query"
uint[blockCount] tStarts; "Start of each block in target"
)
There is a little gotcha in this table format. It has to
do with how coordinates are handled on the negative strand.
In the qStart/qEnd fields the coordinates are where it
matches from the point of view of the forward strand
(even when the match is on the reverse strand). However
on the qStarts[] list, the coordinates are reversed.
Here's an example of a 30-mer that has 2 blocks that
align on the minus strand and 2 blocks on the plus
strand (these types of situations happen in real life in
response to assembly errors sometimes).
0 1 2 3 tens position in query |
0123456789012345678901234567890 ones position in query |
++++ +++++ plus strand alignment on query |
-------- ---------- minus strand alignment on query |
|
Plus strand: |
qStart 12 qEnd 31 blockSizes 4,5 qStarts 12,26 |
Minus strand: |
qStart 4 qEnd 26 blockSizes 10,8 qStarts 5,19 |
Essentially the minus strand blockSizes and qStarts are
what you would get if you reverse complemented the query.
However the qStart and qEnd are non-reversed. To get
from one to the other:
qStart = qSize - revQEnd
qEnd = qSize - revQStart
Net Alignments
table netAlign
"Database representation of a net of alignments"
(
uint level; "Level of alignment"
string tName; "Target chromosome"
uint tStart; "Start on target"
uint tEnd; "End on target"
char[1] strand; "Orientation of query (+ or -)"
string qName; "Query chromosome"
uint qStart; "Start on query"
uint qEnd; "End on query"
uint chainId; "Associated chain ID with alignment details"
uint ali; "Bases in gap-free alignments"
double score; "Score - a number proportional to 100x matching bases"
int qOver; "Overlap with parent gap on query side (-1 for undefined)"
int qFar; "Distance from parent gap on query side ( -1 for undefined)"
int qDup; "Bases with two or more copies in query ( -1 for undefined)"
string type; "Syntenic type: gap/top/syn/nonsyn/inv"
int tN; "Unsequenced bases on target (-1 for undefined)"
int qN; "Unsequenced bases on query (-1 for undefined)"
int tR; "RepeatMasker bases on target (-1 for undefined)"
int qR; "RepeatMasker bases on query (-1 for undefined)"
int tNewR; "Lineage specific repeats on target (-1 for undefined)"
int qNewR; "Lineage specific repeats on query (-1 for undefined)"
int tOldR; "Bases of ancient repeats on target (-1 for undefined)"
int qOldR; "Bases of ancient repeats on query (-1 for undefined)"
int tTrf; "Bases of tandam repeats on target (-1 for undefined)"
int qTrf; "Bases of tandam repeats on query (-1 for undefined)"
)
Non-standard Join Certificates
table certificate
"Non-standard Join Certificates"
(
string accession1; "First accession number"
string accession2; "Second accession number"
string spanner; "Spanner"
string evaluation; "Evaluation"
string variation; "Variation"
string varEvidence; "Variation evidence"
string contact; "Contact info"
string remark; "Remarks"
string comment; "Comments"
)
Orientation Info
The following definition is used by the mrnaOrientInfo and estOrientInfo
tables. It contains extra information on mRNAs/ESTs calculated by the
polyInfo program.
table estOrientInfo
"Extra information on mRNAs/ESTs calculated by polyInfo program"
(
int bin; "A field to speed indexing"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Accession of EST/mRNA"
short intronOrientation; "Orientation of introns with respect to EST/mRNA"
short sizePolyA; "Number of trailing A's"
short revSizePolyA; "Number of trailing A's on reverse strand"
short signalPos; "Position of start of polyA signal relative to end
of EST/mRNA, or 0 if no signal"
short revSignalPos; "PolyA signal position on reverse strand if any"
)
Pfam Description
table pfamDesc
"Pfam description table"
(
string pfamAC; "Pfam accession number"
string pfamID; "Pfam ID"
string description; "Description"
)
Pfam and UniProtKB Cross Reference
table pfamXref
"A cross-reference table between Pfam ID and UniProtKB IDs."
(
string pfamAC; "Pfam accession number"
string swissAC; "UniProtKB accession number"
string swissDisplayID; "UniProtKB display ID"
)
Proteome Browser Amino Acid Distribution
table pbAaDistX
"Distribution for a specific amino acid X"
(
float x; "x value"
float y; "count"
)
Proteome Browser Amino Acid Anomaly Limits
table pbAnomLimit
"Protein amino acid anomaly limits for each AA"
(
char[1] AA; "Amino Acid"
float pctLow; "Percentage (100%=1.0) Lower bound"
float pctHi; "Percentage (100%=1.0) Upper bound"
)
Proteome Browser Cysteine Count Distribution
table pepCCntDist
"Cysteine count distribution"
(
float x; "Number of Cysteines"
float y; "Count of proteins with x Cysteines"
)
Proteome Browser Exon Count Distribution
table pepExonCntDist
"Exon count distribution"
(
float x; "Number of exon"
float y; "Count of proteins with x exons"
)
Proteome Browser Hydrophobicity Distribution
table pepHydroDist
"Hydrophobicity distribution"
(
float x; "Hydrophobicity value"
float y; "Count of proteins with hydrophobicity near x"
)
Proteome Browser InterProt Domain Count Distribution
table pepIPCntDist
"InterProt domain count distribution"
(
float x; "Number of InterPro domains"
float y; "Count of proteins with x InterPro domains"
)
Proteome Browser Molecular Weight and AA Length of Proteins
table pepMwAa
"Molecular weight and AA length of proteins"
(
string accession; "UniProtKB protein accession number"
float molWeight; "Molecular weight"
int aaLen; "Length of protein sequence"
)
Proteome Browser Molecular Weight Distribution
table pepMolWtDist
"Molecular weight distribution"
(
float x; "Molecular weight value"
float y; "Count of proteins with molecular weight near x"
)
Proteome Browser pI Distribution
table pepPiDist
"pI distribution"
(
float x; "pI value"
float y; "Count of proteins with pI value near x"
)
Proteome Browser Predicted Peptide/Predicted Gene
table pepPred
"Predicted peptide linked to predicted gene"
(
string name; "Name of gene - same as in genePred"
lstring seq; "Peptide sequence"
)
Proteome Browser Protein pIs
table pepPi
"Protein pIs"
(
string accession; "Protein accession number"
float pI; "pI value"
)
Proteome Browser Residue Average/Standard Deviation
table pbResAvgStd
"Residue average and standard deviation"
(
char[1] residue; "Protein residue"
float avg; "Average"
float stddev; "Standard deviation"
)
Proteome Browser Residue Distribution
table pepResDist
"Residue distribution"
(
float x; "Residue index (WCMHYNFIDQKRTVPGEASL) "
float y; "Count of proteins with residue index of x"
)
Proteome Browser Stamp Information
table pbStamp
"Info needed for a Proteome Browser stamp"
(
char[40] stampName; "Short Name of stamp"
char[40] stampTable; "Database table name of the stamp (distribution) data"
char[40] stampTitle; "Stamp Title to be displayed"
int len; "Number of x-y pairs"
float xmin; "x minimum"
float xmax; "x maximum"
float ymin; "y minimum"
float ymax; "y maximum"
string stampDesc; "Description of the stamp"
)
PDB and UniProtKB ID links
table pdbSP
"A cross-reference table between UniProtKB accession IDs and PDB IDs."
(
string pdb; "PDB ID"
string sp; "UniProtKB display ID"
)
P-Screen Data (BDGP Gene Disruption Project)
table pscreen
"P-Screen (BDGP Gene Disruption Project) P el. insertion locations/genes"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item (mutant strain with P el. insert here)"
uint score; "Score from 0-1000 (placeholder! for bed 6 compat)"
char[1] strand; "+ or -"
uint stockNumber; "Mutant strain stock number, for ordering"
uint geneCount; "Number of genes disrupted by this insert"
string[geneCount] geneIds; "IDs of disrupted genes"
)
Pseudogene Link to Ortholog/Paralog
table pseudoGeneLink
"Links a gene/pseudogene prediction to an ortholog or paralog."
(
string chrom; "Chromosome name for pseudogene"
uint chromStart; "Pseudogene alignment start position"
uint chromEnd; "Pseudogene alignment end position"
string name; "Name of pseudogene"
uint score; "Score of pseudogene with gene"
char[2] strand; "+ or -"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Always zero for now"
int blockCount; "Number of blocks (a block contains no gaps)"
int[blockCount] blockSizes; "Comma-separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
float trfRatio; "Ratio of tandem repeats"
string type; "Type of evidence"
int axtScore; "Blastz score, gene mRNA aligned to pseudogene"
string gChrom; "Chromosome name"
int gStart; "Gene alignment start position"
int gEnd; "Gene alignment end position"
char[2] gStrand; "Strand of gene"
uint exonCount; "# of exons in gene "
uint geneOverlap; "Bases overlapping"
uint polyA; "Length of polyA"
int polyAstart; "Start of polyA relative to end of pseudogene"
uint exonCover; "Number of exons in gene covered"
uint intronCount; "Number of introns in pseudogene"
uint bestAliCount; "Number of good mRNAs aligning"
uint matches; "Matches + repMatches"
uint qSize; "Aligning bases in pseudogene"
uint qEnd; "End of cDNA alignment"
uint tReps; "Repeats in gene"
uint qReps; "Repeats in pseudogene"
uint overlapDiag; "Bases on the diagonal to mouse"
uint coverage; "Bases on the diagonal to mouse"
int label; "1=pseudogene,-1 not pseudogene"
uint milliBad; "MilliBad score, pseudogene aligned to genome"
uint oldScore; "Another heuristic"
int oldIntronCount; "Old simple intron count"
int conservedIntrons; "Conserved intron count"
string intronScores; "Intron sizes in gene/pseudogene"
int maxOverlap ; "Largest overlap with another mRNA"
string refSeq; "Name of closest RefSeq to gene"
int rStart; "RefSeq alignment start position"
int rEnd; "RefSeq alignment end position"
string mgc; "Name of closest MGC to gene"
int mStart; "MGC alignment start position"
int mEnd; "MGC alignment end position"
string kgName; "Name of closest Known Gene to gene"
int kStart; "Known Gene alignment start position"
int kEnd; "Known Gene alignment end position"
string overName; "Name of overlapping mrna"
int overStart; "Overlapping mRNA start position"
int overEnd; "Overlapping mRNA end position"
char[2] overStrand; "Strand of overlapping mRNA"
int adaBoost; "AdaBoost label"
float posConf; "P-value for positive"
float negConf; "P-value for positive"
)
RankProp Score
table rankProp
"RankProp protein ranking for a pair of proteins"
(
string query; "Known Genes ID of query protein"
string target; "Known Genes ID of target protein"
float score; "Rankp score"
)
Rat Synteny Alignments
table syntenyRat
"Rat synteny alignnments from blastz single coverage"
(
string chrom; "Human chromosome"
uint chromStart; "Start on human chromosome"
uint chromEnd; "End on human chromosome"
string name; "Rat chromosome"
uint score; "Score (always zero)"
char[1] strand; "+ (direction matches) or - (opposite direction)"
uint thickStart; "Start of where display should be thick"
uint thickEnd; "End of where display should be thick"
)
Recombination Rate
table recombRate
"Describes the recombination rate in 1Mb intervals based on deCODE, Marshfield and
Genethon maps"
(
string chrom; "Chromosome number"
uint chromStart; "Start position in genoSeq"
uint chromEnd; "End position in genoSeq"
string name; "Constant string 'recombRate'"
float decodeAvg; "Calculated deCODE Sex-averaged rate from map"
float decodeFemale; "Calculated deCODE female recombination rate"
float decodeMale; "Calculated deCODE male recombination rate"
float marshfieldAvg; "Calculated Marshfield recombination rate"
float marshfieldFemale; "Calculated Marshfield female recombination"
float marshfieldMale; "Calculated Marshfield male recombination rate"
float genethonAvg; "Calculated Genethon recombination rate"
float genethonFemale; "Calculated Genethon female recombination rate"
float genethonMale; "Calculated Genethon male recombination rate"
)
RefSeq Link
First used Dec. 2000
table refLink
"Link between RefSeq mRNAs and HUGO Gene Nomenclature Committee, LocusLink etc."
(
string name; "Name displayed in UI"
string product; "Name of protein product"
string mrnaAcc; "mRNA accession"
string protAcc; "Protein accession"
uint geneId; "Pointer to geneName table"
uint prodId; "Pointer to prodName table"
uint locusLinkId; "Locus Link ID"
uint omimId; "OMIM ID"
)
RefSeq Status
First used Dec. 2001
table refSeqStatus
"Links RefSeq mRNA accessions with status"
(
string mrnaAcc; "RefSeq mRNA accession"
string status; "RefSeq status (Reviewed, Provisional, Predicted)"
)
RefSeq Summary
First used Oct. 2003
table refSeqStatus
"Sparse summary table created for refSeqs with "Summary:" or "COMPLETENESS:" in comment"
(
string mrnaAcc; "RefSeq mRNA accession"
enum completeness; "Level of completeness: Unknown, Complete5End,
Complete3End, FullLength, IncompleteBothEnds,
Incomplete5End, Incomplete3End, Partial"
string summary; "Summary text"
)
Regulatory Module TFBS Predictions (Eran Segal)
table esRegGeneToMotif
"Browser extensible data with gene field"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Motif name"
uint score; "Score from 0-1000"
char[1] strand; "+ or -"
varchar gene; "Gene name"
)
Repeating Elements
Table created from RepeatMasker .out files.
table rmsk
"RepeatMasker .out record"
(
uint swScore; "Smith Waterman alignment score"
uint milliDiv; "Base mismatches in parts per thousand"
uint milliDel; "Bases deleted in parts per thousand"
uint milliIns; "Bases inserted in parts per thousand"
string genoName; "Genomic sequence name"
uint genoStart; "Start in genomic sequence"
uint genoEnd; "End in genomic sequence"
int genoLeft; "Size left in genomic sequence"
char[1] strand; "Relative orientation + or -"
string repName; "Name of repeat"
string repClass; "Class of repeat"
string repFamily; "Family of repeat"
int repStart; "Start in repeat sequence"
uint repEnd; "End in repeat sequence"
int repLeft; "Size left in repeat sequence"
char[1] id; "First digit of id field in RepeatMasker .out file. Best ignored."
)
Riken Altid Table
table rikenaltid
"Riken altid table"
(
char[64] ID; "ID (composite)"
char[32] SeqID; "Sequence ID"
char[32] AltIDType; "Alternate ID type"
string AltID; "Alternate ID"
)
Riken Annotation Table
table rikenann
"Riken annotation table"
(
string ID; "ID"
string seqid; "parent sequence id"
uint i; "sequential number"
string Qualifier; "Qualifier"
string Anntext; "Anntext"
string Datasrc; "Datasrc"
string Srckey; "Srckey"
string Href; "xlink_href"
string Evidence; "Evidence"
)
Riken Cluster Table
table rikencluster
"Riken cluster table"
(
string ID; "ID"
string Fantomid; "Famtom"
string RepresentativeSeqid; "Representative sequence ID"
uint Nsequence; "Number of Sequencesse"
)
Riken Cluster Sequence Regulation Table
table rikenclusterseq
"Riken cluster sequence regulation table"
(
string ClusterID; "Cluster ID"
string SeqID; "Sequence ID"
)
Riken Sequence Table
table rikenseq
"Riken sequence table"
(
string ID; "ID"
uint Naltid; "Number of altid"
string Seqid; "sequence ID (no more valid per Riken DTD)"
string Fantomid; "Fantom ID (no more valid per Riken DTD)"
string Cloneid; "Clone ID (no more valid per Riken DTD)"
string Rearrayid; "Rearray ID (no more valid per Riken DTD)"
string Accession; "Accession (no more valid per Riken DTD)"
string Annotator; "Annotator"
string version; "Version"
string ModifiedTime; "Modified Time"
uint Nannotation; "Number of Annotations"
string Comment; "Comment"
)
RNA Folding Info
table rnaFold
"Info about folding of RNA into secondary structure"
(
string name; "mRNA accession"
lstring seq; "mRNA sequence (U's instead of T's)"
lstring fold; "Parenthesis and .'s that describe folding"
float energy; "Estimated free energy of folding (negative)"
)
RNA Gene Positions
Last used Apr. 2001.
table rnaGene
"Describes functional RNA genes."
(
string chrom; "Chromosome gene is on"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of gene"
uint score; "Score from 0 to 1000"
char[1] strand; "Strand: + or -"
string source; "Source as in Sean Eddy's files."
string type; "Type - snRNA, rRNA, tRNA, etc."
float fullScore; "Score as in Sean Eddy's files."
ubyte isPsuedo; "TRUE(1) if psuedo, FALSE(0) otherwise"
)
RNA - Weber and Griffiths-Jones
table wgRna
"CD and H/ACA Box snoRNAs and microRNAs from Weber and Griffiths-Jones"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
uint score; "Score from 0 to 1000 (bed6 compat.)"
char[1] strand; "+ or - (bed6 compat.)"
uint thickStart; "Start of thick region"
uint thickEnd; "End of thick region"
string type; "RNA type"
}
Rosetta Information
table rosettaExps
"Rosetta Experimental Confirmation information"
(
int ID;
string name;
longblob description;
longblob URL;
longblob ref;
longblob credit;
uint numExtras;
longblob extra;
)
Saccharomyces Genome Database Protein Abundance
table sgdAbundance
"Protein abundance data from http://yeastgfp.ucsf.edu via SGD"
(
string name; "ORF name in sgdGene table"
float abundance; "Absolute abundance from 41 to 1590000"
string error; "Error - either a floating point number or blank"
)
Saccharomyces Genome Database Clone Information
table sgdClone
"Clone info from yeast genome database"
(
string chrom; "Chromosome in chrN format"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Washington University name"
string atccName; "ATCC clone name (optional)"
)
Saccharomyces Genome Database Genes Description
table sgdDescription
"Description of SGD Genes and Other Features"
(
string name; "Name in sgdGene or sgdOther table"
string type; "Type of feature from gff3 file"
lstring description; "Description of feature"
)
Saccharomyces Genome Database Other Features
table sgdOther
"Features other than coding genes from yeast genome database"
(
string chrom; "Chromosome in chrNN format"
int chromStart; "Start (zero based)"
int chromEnd; "End (non-inclusive)"
string name; "Feature name"
int score; "Always 0"
char[1] strand; "Strand: +, - or ."
string type; "Feature type"
)
Sage
table sage
"Stores SAGE data in terms of UniGene identifiers"
(
int uni; "Number portion of UniGene identifier"
string gb; "GenBank accession number"
string gi; "gi field in UniGene descriptions"
lstring description; "Description from UniGene fasta headers"
int numTags; "Number of tags"
string[numTags] tags; "Tags for this unique sequence"
int numExps; "Number of experiments"
int[numExps] exps; "Index of experiemtns in order of aves and stdevs"
float[numExps] meds; "Meida count of all tags for each experiment"
float[numExps] aves; "Average count of all tags for each experiment"
float[numExps] stdevs "Standard deviations of all counts for each experiment"
)
Sage Experiments
table sageExp
"Data related to SAGE experiments, tissue descriptions, etc."
(
int num; "Index of the experiment in the sage table"
string exp; "Experiment name"
int totalCount; "Sum of all the tag counts for this experiment"
string tissueType; "Brief description of cells used"
string tissueDesc; "Brief description of tissues"
string tissueSupplier; "Who supplied the tissue"
string organism; "Organism source of cells"
string organ; "Organ identifier"
lstring producer; "Source of tissue"
lstring desription; "Description of experiment"
)
SAM Protein Homolog Data
table protHomolog
"table to store SAM homolog results"
(
string proteinID; "Protein ID"
string homologID; "Homolog ID"
char[1] charin; "Chain"
int length; "Length of protein sequence"
double bestEvalue; "Best E-value"
double evalue; "E-value"
string FSSP; "FSSP ID"
string SCOPdomain; "SCOP domain ID"
string SCOPsuid; "SCOP sunid"
)
SAM Data Subdirectory Linked to Known Gene ID
table samSubdir
"Link together a Known Gene ID and a gene alias"
(
string proteinId; "Protein ID"
string subdir; "Name of the subdirectory where SAM results are"
)
Sample Tracks for Plotting Real-valued Data
table sample9
"Describes (x,y) pairs of samples"
{
ushort bin; "A field to speed indexing added to many
tables August 2001"
string chrom; "Chromosome alignment is on."
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Unique index given to each block of samples"
uint score; "Overall block score from 0 to 1000 (chrN_humMusL
only)"
char[1] strand; "Strand: + or -"
uint sampleCount "Number of samples in this block"
uint[sampleCount] samplePosition "Base position of each sample in this
block, relative to chromStart"
uint[sampleCount] sampleHeight "Score for each sample (y-value) from 0 to 1000"
}
Sample Tracks for Plotting Real-valued Data (Rev 2)
table wiggle
"Wiggle track values to display as y-values (first 6 fields are bed6)"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
uint span; "Each value spans this many bases"
uint count; "Number of values in this block"
uint offset; "Offset in file to fetch data"
string file; "Path name to data file, one byte per value"
double lowerLimit; "Lowest data value in this block"
double dataRange; "LowerLimit + dataRange = upperLimit"
uint validCount; "Number of valid data values in this block"
double sumData; "Sum of the data points, for average and stddev calc"
double sumSquares; "Sum of data points squared, for stddev calc"
)
Sanger 20 and 22 Genes - Additional Information
table sanger22extra
"Table with additional information about a Sanger 22 gene"
(
string name; "Transcript name"
string locus; "Possibly biological short name"
lstring description; "Description from Sanger gene GFFs"
string geneType; "Type field from Sanger gene GFFs"
string cdsType; "Type field from Sanger CDS GFFs"
)
Sanger Genes (WormBase)
table sangerGene
"GenePred table with proteinID field for WormBase Genes."
(
string name; "Name of gene"
string chrom; "Chromosome name"
char[1] strand; "+ or - for strand"
uint txStart; "Transcription start position"
uint txEnd; "Transcription end position"
uint cdsStart; "Coding region start"
uint cdsEnd; "Coding region end"
uint exonCount; "Number of exons"
uint[exonCount] exonStarts; "Exon start positions"
uint[exonCount] exonEnds; "Exon end positions"
string proteinID; "UniProtKB protein ID"
)
SCOP Descriptions
table scopDes
"Structural Classification of Proteins description"
(
int sunid; "Unique integer"
char[2] type; "Type. sf=superfamily, fa = family, etc."
string sccs; "Dense hierarchy info."
string sid; "Older ID."
lstring description; "Descriptive text."
)
Scored Ref
table scoredRef
"A score, a range of positions in the genome and an extFile offset"
(
int bin; "Field used to speed indexing"
string chrom; "Chromosome (this species)"
uint chromStart; "Start position in chromosome (forward strand)"
uint chromEnd; "End position in chromosome"
uint extFile; "Pointer to associated external file"
bigint offset; "Offset in external file"
float score; "Value between 0.0 and 1.0 that loosely reflects % identity ratio.
Derived from multiz score by dividing it by the number of pairwise
comparisons in the multiple alignment and by the number of bases
in the alignment."
)
Segmental Duplications
table genomicSuperDups
"Summary of large genomic Duplications (>1KB >90% similar)"
(
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Other chromosome involved"
uint score; "Score from 900-1000. 1000 is best"
char[1] strand; "Value should be + or -"
string otherChrom; "Other chromosome or FPC contig"
uint otherStart; "Start in other sequence"
uint otherEnd; "End in other sequence"
uint otherSize; "Total size of other sequence"
uint uid; "Unique ID"
uint posBasesHit; "HitPositive UnCovered"
string testResult; "HitPositive (yes or no) UnCovered (covered=0)"
string verdict; "Real or Allele"
string alignfile; "Alignment file path"
uint alignL; "Spaces/positions in alignment"
uint indelN; "Number of indels"
uint indelS; "Indel spaces"
uint alignB; "Bases Aligned"
uint matchB; "Aligned bases that match"
uint mismatchB; "Aligned bases that do not match"
uint transitionsB; "Number of transitions"
uint transversionsB; "Number of transversions"
float fracMatch; "Fraction of matching bases"
float fracMatchIndel; "Fraction of matching bases with indels"
float jcK; "K-value calculated with Jukes-Cantor"
float k2K; "Kimura K"
)
Seq
table seq
"Locations of database table sequences referenced in external fasta files"
(
int id; "Sequence ID"
string acc; "Accession or name of sequence"
int size; "Size of sequence"
date gb_date; "Modification date for genbank mRNAs/ESTs"
int extFile; "ID of file in extFile table"
int file_offset; "Byte offset of sequence start in fasta file"
int file_size; "Size of sequence record in fasta file (includes fasta
sequence header"
)
Simple Repeats
This table was generated by the trf program. It does a more thorough job
of finding simple tandem repeats than RepeatMasker does.
table simpleRepeat
"Describes the Simple Tandem Repeats"
(
ushort bin; "A field to speed indexing"
uint matches; "Number of bases that match that aren't repeats"
uint misMatches; "Number of bases that don't match"
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Simple Repeats tag name"
uint period; "Length of repeat unit"
float copyNum; "Mean number of copies of repeat"
uint consensusSize; "Length of consensus sequence"
uint perMatch; "Percentage Match"
uint perIndel; "Percentage Indel"
uint score; "Score between and . Best is ."
uint A; "Number of A's in repeat unit"
uint C; "Number of C's in repeat unit"
uint G; "Number of G's in repeat unit"
uint T; "Number of T's in repeat unit"
float entropy; "Entropy"
string sequence; "Sequence of repeat unit element"
)
SNP Map (combined table of SNPs, hg17 and later)
table snp
"Polymorphism data from dbSnp XML files or genotyping arrays"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chrom"
uint chromEnd; "End position in chrom"
string name; "Reference SNP identifier or Affy SNP name"
uint score; "Not used"
char[1] strand; "Which DNA strand contains the observed alleles"
string observed; "The sequences of the observed alleles"
string molType; "Sample type from exemplar ss"
string class; "The class of variant"
string valid; "The validation status of the SNP"
float avHet; "The average heterozygosity from all observations"
float avHetSE; "The Standard Error for the average heterozygosity"
string func; "The functional category of the SNP"
string locType; "How the variant affects the reference sequence"
string source; "Source of the data - dbSnp, Affymetrix, ..."
string exception; "List of exceptionIds for 'invariant' conditions"
)
SNP Exceptions
table snpExceptions
"Set of queries to look for snps that appear problematic"
(
uint exceptionId; "Unique ID for this exception"
string query; "SQL string to retrieve bad records"
uint num; "Count of SNPs that fail this condition"
string description; "Text string for readability"
string resultPath; "Path for results file"
)
SNP Map (combined table of SNPs, hg13-hg16)
table snpMap
"SNP positions from various sources"
(
string chrom; "Chromosome or 'unknown'"
uint chromStart; "Start position in chrom"
uint chromEnd; "End position in chrom"
string name; "Name of SNP - rsId or Affy name"
string source; "BAC_OVERLAP | MIXED | RANDOM | OTHER | Affy10K | Affy120K
"
string type; "SNP | INDEL | SEGMENTAL"
)
Softberry Genes - Protein Homologies
table softberryHom
"Protein homologies behind Softberry genes"
(
string name; "Softberry gene name"
string giString; "String with GenBank gi and accession"
lstring description; "Freeform (except for no tabs) description"
)
Standard Linked Features
The phMouse and genscanSubopt tables use the first 7 fields of this table.
The bacEndPairs table uses all fields.
table lfs
"Standard linked features series table"
(
short bin; "Bin number for browser speedup"
string chrom; "Chromosome or FPC contig"
uint chromStart; "Start position of clone in chromosome"
uint chromEnd; "End position of clone in chromosome"
string name; "Name of clone"
uint score; "Score = 1000/(# of times clone appears in assembly)"
char[1] strand; "Value should be + or -"
string pslTable; "Table which contains corresponding PSL records for
linked features"
uint lfCount; "Number of linked features in the series"
uint[lfCount] lfStarts; "Comma-separated list of start positions of each linked
feature in genomic"
uint[lfCount] lfSizes; "Comma-separated list of sizes of each linked feature
in genomic"
string[lfCount] lfNames; "Comma-separated list of names of linked features"
)
STS and FISH Clone Positions
(obsolete as of Apr. 2001, replaced by stsMap)
table stsMarker
"STS marker and its position on golden path and various maps"
(
string chrom; "Chromosome or 'unknown'"
int chromStart; "Start position in chrom (-1 if unpositioned)"
uint chromEnd; "End position in chrom"
string name; "Name of STS marker"
uint score; "Score of a marker - depends on how many contigs it hits"
uint identNo; "Identification number of STS"
string ctgAcc; "Contig accession number
string otherAcc; "Accession number of other contigs that the marker hits"
string genethonChrom; "Chromosome from Genethon map or 0 if none"
float genethonPos; "Position on Genethon map"
string marshfieldChrom; "Chromosome from Marshfield map or 0 if none"
float marshfieldPos; "Position on Marshfield map"
string gm99Gb4Chrom; "Chromosome from GeneMap99 map or 0 if none"
float gm99Gb4Pos; "Position on GeneMap99 map"
string shgcG3Chrom; "Chromosome from G3 map or 0 if none"
float shgcG3Pos; "Position on G3 map"
string wiYacChrom; "Chromosome from Whitehead YAC map or 0 if none"
float wiYacPos; "Position on Whitehead YAC map"
string shgcTngChrom; "Chromosome from TNG map or 0 if none"
float shgcTngPos; "Position on TNG map"
string fishChrom; "Chromosome from FISH map or 0 if none"
string beginBand; "Beginning of range of bands on FISH map"
string endBand; "End of range of bands on FISH map"
string lab; "Laboratory that placed the FISH clone"
)
STS Clone Positions
table stsMap
"STS marker and its position on golden path and various maps"
(
string chrom; "Chromosome or 'unknown'"
int chromStart; "Start position in chrom - negative 1 if unpositioned"
uint chromEnd; "End position in chrom"
string name; "Name of STS marker"
uint score; "Score of a marker = 1000/(# of placements)"
uint identNo; "Identification number of STS"
string ctgAcc; "Contig accession number"
string otherAcc; "Accession number of other contigs that the marker hits"
string genethonChrom; "Chromosome (no chr) from Genethon map or 0 if none"
float genethonPos; "Position on Genethon map"
string marshfieldChrom; "Chromosome (no chr) from Marshfield map or 0 if none"
float marshfieldPos; "Position on Marshfield map"
string gm99Gb4Chrom; "Chromosome (no chr) from GeneMap99 map or 0 if none"
float gm99Gb4Pos; "Position on gm99_bg4 map"
string shgcTngChrom; "Chromosome (no chr) from shgc_tng map or 0 if none"
float shgcTngPos; "Position on shgc_tng map"
string shgcG3Chrom; "Chromosome (no chr) from Stanford G3 map or 0 if none"
float shgcG3Pos; "Position on shgc_g3 map"
string wiYacChrom; "Chromosome (no chr) from Whitehead YAC map or 0 if none"
float wiYacPos; "Position on wi_yac map"
string wiRhChrom; "Chromosome (no chr) from Whitehead RH map or 0 if none"
float wiRhPos; "Position on wi_rh map"
string fishChrom; "Chromosome (no chr) from FISH map or 0 if none"
string beginBand; "Beginning of range of bands on FISH map"
string endBand; "End of range of bands on FISH map"
string lab; "Laboratory that placed the FISH clone"
)
STS Clone Positions (MGI)
table stsMapMouse
"STS marker and its position on mouse assembly"
(
string chrom; "Chromosome or 'unknown'"
int chromStart; "Start position in chromosome (-1 if unpositioned)"
uint chromEnd; "End position in chromosome"
string name; "Name of STS marker"
uint score; "Score of a marker = 1000/(# of placements)"
uint identNo; "UCSC ID number"
uint probeId; "Probe identification number of STS"
uint markerId; "Marker identification number of STS"
)
STS Clone Positions (NCBI)
table stsMapMouseNew
"STS marker and its position on mouse assembly - mm3 and higher"
(
string chrom; "Chromosome or 'unknown'"
int chromStart; "Start position in chromosome (-1 if unpositioned)"
uint chromEnd; "End position in chromosome"
string name; "Name of STS marker"
uint score; "Score of a marker = 1000/(# of placements)"
uint identNo; "Identification number of STS"
string ctgAcc; "Contig accession number"
string otherAcc; "Accession number of other contigs that the marker hits"
string rhChrom; "Chromosome (no chr) from RH map or 0 if none"
float rhPos; "Position on RH map"
float rhLod; "LOD score of RH map"
string fhhChr; "Chromosome (no chr) from FHHxACI genetic or 0 if none"
float fhhPos; "Position on FHHxACI map"
string shrspChrom; "Chromosome (no chr) from SHRSPxBN geneticmap or 0 if none"
float shrspPos; "Position on SHRSPxBN genetic map"
)
STS Clone Positions (Rat)
table stsMapRat
"STS marker and its position on rat assembly"
(
string chrom; "Chromosome or 'unknown'"
int chromStart; "Start position in chromosome (-1 if unpositioned)"
uint chromEnd; "End position in chromosome"
string name; "Name of STS marker"
uint score; "Score of a marker = 1000/(# of placements)"
uint identNo; "Identification number of STS"
string ctgAcc; "Contig accession number"
string otherAcc; "Accession number of other contigs that the marker hits"
string rhChrom; "Chromosome (no chr) from RH map or 0 if none"
float rhPos; "Position on RH map"
float rhLod; "LOD score of RH map"
string fhhChr; "Chromosome (no chr) from FHHxACI genetic or 0 if none"
float fhhPos; "Position on FHHxACI map"
string shrspChrom; "Chromosome (no chr) from SHRSPxBN geneticmap or 0 if none"
float shrspPos; "Position on SHRSPxBN genetic map"
)
STS Marker Aliases and Associated Identification Numbers
table stsAlias
"STS marker aliases and associated identification numbers"
(
string alias; "STS marker name"
uint identNo; "Identification number of STS marker"
string trueName; "Official UCSC name for marker"
)
STS Marker Constant Information
table stsInfo
"Constant STS marker information"
(
uint identNo; "UCSC identification number"
string name; "Official UCSC name"
uint gbCount; "Number of related GenBank accessions"
string[gbCount] genbank; "Related GeneBank accessions"
uint gdbCount; "Number of related GDB identifiers"
string[gdbCount] gdb; "Related GDB identifiers"
uint nameCount; "Number of alias names"
string[nameCount] otherNames; "Alias names"
uint dbSTSid; "ID number in UniSTS or dbSTS"
uint otherDbstsCount; "Number of related dbSTS IDs"
uint[otherDbstsCount] otherDbSTS; "Related dbSTS IDs"
string leftPrimer; "5' primer sequence"
string rightPrimer; "3' primer sequence"
string distance; "Length of STS sequence"
string organism; "Organism for which STS discovered"
uint sequence; "Whether the full sequence is available (1) or not
(0) for STS"
uint otherUCSCcount; "Number of related active UCSC ids"
uint[otherUCSCcount] otherUCSC; "Related active UCSC ids"
uint mergeUCSCcount; "Number of merged inactive UCSC ids"
uint[mergeUCSCcount] mergeUCSC; "Related merged inactive UCSC ids"
string genethonName; "Name in Genethon map"
string genethonChr; "Chromosome in Genethon map"
float genethonPos; "Position in Genethon map"
float genethonLOD; "LOD score in Genethon map"
string marshfieldName; "Name in Marshfield map"
string marshfieldChr; "Chromosome in Marshfield map"
float marshfieldPos; "Position in Marshfield map"
float marshfieldLOD; "LOD score in Marshfield map"
string wiyacName; "Name in WI YAC map"
string wiyacChr; "Chromosome in WI YAC map"
float wiyacPos; "Position in WI YAC map"
float wiyacLOD; "LOD score in WI YAC map"
string wirhName; "Name in WI RH map"
string wirhChr; "Chromosome in WI RH map"
float wirhPos; "Position in WI RH map"
float wirhLOD; "LOD score in WI RH map"
string gm99gb4Name; "Name in GeneMap99 GB4 map"
string gm99gb4Chr; "Chromosome in GeneMap99 GB4 map"
float gm99gb4Pos; "Position in GeneMap99 GB4 map"
float gm99gb4LOD; "LOD score in GeneMap99 GB4 map"
string gm99g3Name; "Name in GeneMap99 G3 map"
string gm99g3Chr; "Chromosome in GeneMap99 G3 map"
float gm99g3Pos; "Position in GeneMap99 G3 map"
float gm99g3LOD; "LOD score in GenMap99 G3 map"
string tngName; "Name in Stanford TNG map"
string tngChr; "Chromosome in Stanford TNG map"
float tngPos; "Position in Stanford TNG map"
float tngLOD; "LOD score in Stanford TNG map"
)
STS Marker Constant Information (Revised version)
table stsInfo2
"Constant STS marker information - revision"
(
uint identNo; "UCSC identification number"
string name; "Official UCSC name"
uint gbCount; "Number of related GenBank accessions"
string[gbCount] genbank; "Related GeneBank accessions"
uint gdbCount; "Number of related GDB identifiers"
string[gdbCount] gdb; "Related GDB identifies"
uint nameCount; "Number of alias names"
string[nameCount] otherNames; "Alias names"
uint dbSTSid; "ID number in UniSTS or dbSTS"
uint otherDbstsCount; "Number of related dbSTS IDs"
uint[otherDbstsCount] otherDbSTS; "Related dbSTS IDs"
string leftPrimer; "5' primer sequence"
string rightPrimer; "3' primer sequence"
string distance; "Length of STS sequence"
string organism; "Organism for which STS discovered"
uint sequence; "Whether the full sequence is available (1) or not
(0) for STS"
uint otherUCSCcount; "Number of related active UCSC ids"
uint[otherUCSCcount] otherUCSC; "Related active UCSC ids"
uint mergeUCSCcount; "Number of merged inactive UCSC ids"
uint[mergeUCSCcount] mergeUCSC; "Related merged inactive UCSC ids"
string genethonName; "Name in Genethon map"
string genethonChr; "Chromosome in Genethon map"
float genethonPos; "Position in Genethon map"
float genethonLOD; "LOD score in Genethon map"
string marshfieldName; "Name in Marshfield map"
string marshfieldChr; "Chromosome in Marshfield map"
float marshfieldPos; "Position in Marshfield map"
float marshfieldLOD; "LOD score in Marshfield map"
string wiyacName; "Name in WI YAC map"
string wiyacChr; "Chromosome in WI YAC map"
float wiyacPos; "Position in WI YAC map"
float wiyacLOD; "LOD score in WI YAC map"
string wirhName; "Name in WI RH map"
string wirhChr; "Chromosome in WI RH map"
float wirhPos; "Position in WI RH map"
float wirhLOD; "LOD score in WI RH map"
string gm99gb4Name; "Name in GeneMap99 GB4 map"
string gm99gb4Chr; "Chromosome in GeneMap99 GB4 map"
float gm99gb4Pos; "Position in GeneMap99 GB4 map"
float gm99g3LOD; "LOD score in GenMap99 G3 map"
string tngName; "Name in Stanford TNG map"
string tngChr; "Chromosome in Stanford TNG map"
float tngPos; "Position in Stanford TNG map"
float tngLOD; "LOD score in Stanford TNG map"
string decodeName; "Name in deCODE map"
string decodeChr; "Chromosome in deCODE TNG map"
float decodePos; "Position in deCODE TNG map"
float decodeLOD; "LOD score in deCODE TNG map"
)
STS Marker Constant Information (Mouse - mm1, mm2)
table stsInfoMouse
"Constant STS marker information for the mouse genome"
(
uint identNo; "UCSC identification number"
string name; "Official UCSC name"
uint MGIPrimerID; "STS primer's MGI ID, or 0 if N/A"
string primerName; "STS primer's name"
string primerSymbol; "STS primer's symbol"
string primer1; "Primer1 sequence"
string primer2; "Primer2 sequence"
string distance; "Length of STS sequence"
uint sequence; "Whether full sequence is available (1)
or not (0) for STS"
uint MGIMarkerID; "STS marker's MGI ID, or 0 if N/A"
string stsMarkerSymbol; "Symbol of STS marker"
string Chr; "Chromosome in genetic map"
float geneticPos; "Position in genetic map
(-2 if N/A, -1 if syntenic)"
string stsMarkerName; "Name of STS Marker"
uint LocusLinkID; "Locuslink ID, or 0 if N/A"
)
STS Marker Constant Information (Mouse - mm3 and higher)
table stsInfoMouseNew
"Constant STS marker information for the mouse genome - mm3 and higher"
(
uint identNo; "UCSC identification number"
string name; "Official UCSC name"
uint RGDId; "Marker's RGD ID"
string RGDName; "Marker's RGD name"
uint UiStsId; "Marker's UiStsId"
uint nameCount; "Number of alias"
string alias; "Alias, or N/A"
string primer1; "Primer1 sequence"
string primer2; "Primer2 sequence"
string distance; "Length of STS sequence"
uint sequence; "Whether the full sequence is available (1) or not
(0) for STS"
string organis; "Organism for which STS discovered"
string fhhName; "WI_Mouse_Genetic map"
string fhhChr; "Chromosome in Genetic map"
float fhhGeneticPos; "Position in Genetic map"
string shrspName; "MGD map"
string shrspChr; "Chromosome in Genetic map"
float shrspGeneticPos; "Position in Genetic map"
string rhName; "MRC_RH map"
string rhChr; "Chromosome in Genetic map"
float rhGeneticPos; "Position in Genetic map"
float RHLOD; "LOD score of RH map"
string GeneName; "Associated gene name"
string GeneID; "Associated gene ID"
string clone; "Clone sequence"
)
STS Marker Constant Information (Rat)
table stsInfoRat
"Constant STS marker information for the rat genome"
(
uint identNo; "UCSC identification number"
string name; "Official UCSC name"
uint RGDId; "Marker's RGD ID"
string RGDName; "Marker's RGD name"
uint UiStsId; "Marker's UiStsId"
uint nameCount; "Number of alias"
string alias; "Alias, or N/A"
string primer1; "Primer1 sequence"
string primer2; "Primer2 sequence"
string distance; "Length of STS sequence"
uint sequence; "Whether the full sequence is available (1) or not
(0) for STS"
string organis; "Organism for which STS discovered"
string fhhName; "WI_Mouse_Genetic map"
string fhhChr; "Chromosome in Genetic map"
float fhhGeneticPos; "Position in Genetic map"
string shrspName; "MGD map"
string shrspChr; "Chromosome in Genetic map"
float shrspGeneticPos; "Position in Genetic map"
string rhName; "MRC_RH map"
string rhChr; "Chromosome in Genetic map"
float rhGeneticPos; "Position in Genetic map"
float RHLOD; "LOD score of RH map"
string GeneName; "Associated gene name"
string GeneID; "Associated gene ID"
string clone; "Clone sequence"
)
Superfamily Assignments
table sfAssign
"Superfamily assignment table"
(
string genomeID; "Genome ID"
string seqID; "Sequence ID"
string modelID; "Model ID"
string region; "Region"
string eValue; "E value"
string sfID; "Superfamily entry ID"
string sfDesc; "Superfamily entry description"
)
Superfamily Description
table sfDes
"Superfamily description table"
(
int id; "ID"
char[2] level; "Level"
string classification; "Classification"
string name; "Name"
string description; "Description"
)
Superfamily Summary Entry Description
table sfDescription
"Stores domain descriptions for the Superfamily track"
(
string name; "Superfamily ID (same as Ensembl transcript name)"
string proteinID; "ID of corresponding Ensembl translation"
string description; "Domain description"
)
UniProtKB IDs Linked to Disease Descriptions
table spDisease
"A cross-reference table between UniProtKB IDs and disease description."
(
string accession; "UniProtKB accession number"
string displayID; "UniProtKB display ID"
lstring diseaseDesc; "Disease description"
)
UniProtKB IDs Linked to InterPro IDs
table swInterPro
"A cross-reference table between UniProtKB accession IDs and InterPro IDs."
(
string accession; "UniProtKB accession number"
string interProId; "InterPro ID"
)
UniProtKB IDs Linked to Organisms
table spOrganism
"A cross-reference table between UniProtKB display IDs and taxonomy numbers."
(
string displayID; "UniProtKB display ID"
string organism; "Taxonomy number"
)
UniProtKB IDs Linked to Other Databases
table spXref2
"A xref table between UniProtKB ids and other databases."
(
string accession; "UniProtKB accession number"
string displayID; "UniProtKB display ID"
string division; "UniProtKB division"
int bioentryID; "Biosql bioentry ID"
int biodatabaseID; "Biosql biodatabase ID"
string description; "Description"
string hugoSymbol; "HUGO Gene Nomenclature Committee gene symbol"
string hugoDesc; "HUGO Gene Nomenclature Committee gene description"
)
UniProtKB IDs Linked to Secondary IDs
table spSecondaryID
"A cross-reference table between UniProtKB accession IDs and secondary accessions."
(
string displayID; "UniProtKB display ID"
string accession; "UniProtKB accession number"
string accession2; "UniProtKB secondary accession number"
)
UniProtKB mRNA
table spMrna
"Associates UniProtKB IDs with mRNA IDs"
(
string spID; "UniProtKB ID"
string mrnaID; "mRNA ID"
)
UniProtKB PSI-BLAST E-value
table spPsiBlast
"PSI-BLAST all-against-all of UniProtKB proteins. UniProtKB accessions
are mapped to Known Genes IDs before being loaded into the table."
(
string kgId1; "Known Gene ID of one protein"
string kgId2; "Known Gene ID of other protein"
float eValue; "Best bi-directional E-value"
)
tBLASTn Protein Track Mappings
table blastRef
"Mapping table for tblastn protein tracks"
(
string acc; "Accession"
string geneId; "Gene name"
string refPos; "Reference position"
string extra1; "Other link 1 (e.g. UniProtKB name)
string extra2; "Other link 2"
)
Tiling Path of Clones through a Chromosome
table tilingPath
"A tiling path of clones through a chromosome"
(
string chrom; "Chromosome name: chr1, chr2, etc."
string accession; "Clone accession or ? or GAP"
string clone; "Clone name in BAC library"
string contig; "Contig (or gap size)"
int chromIx; "Number of clone in tiling path starting chrom start"
)
Transcription Factor Binding Motif (HarbisonGordon et al.)
table transRegCodeMotif
"A transcription factor binding motif according to Harbison Gordon et al"
(
string name; "Motif name."
int columnCount; "Count of columns in motif."
float[columnCount] aProb; "Probability of A's in each column."
float[columnCount] cProb; "Probability of C's in each column."
float[columnCount] gProb; "Probability of G's in each column."
float[columnCount] tProb; "Probability of T's in each column."
)
Transcription Factor Binding Sites
table tfbsCons
"TFBS data"
(
string chrom; "Chromosome"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
uint score; "Score from 0-1000"
char[1] strand; "+ or -"
char[6] species; "Common name, scientific name"
char[64] factor; "Factor"
char[10] id; "ID"
)
Transcription Factor Binding Sites and CHIP/CHIP Probe Info
table transRegCodeProbe
"CHIP/CHIP Probe and Transcription Factor Binding Info"
(
string chrom; "Chromosome binding site is on"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of probe"
uint tfCount; "Count of bound transcription factors_conditions"
string[tfCount] tfList; "List of bound transcription factors_conditions
"
float[tfCount] bindVals; "E-values for factor binding (lower is better)"
)
Transcription Factor Binding Sites - CHIP/CHIP Experiments
table transRegCode
"Transcription factor binding sites from CHIP/CHIP experiments and conservation"
(
string chrom; "Chromosome binding site is on"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of transcription factor"
uint score; "Score from 0 to 1000"
string chipEvidence; "Evidence strength from CHIP/CHIP assay"
uint consSpecies; "Number of species conserved in"
)
Transcription Factor Binding Sites Conservation Map Data
table tfbsConsMap
"tfbsConsMap Data"
(
string id; "TRANSFAC id"
string ac; "gene-regulation.com AC"
)
Transcription Factor Growth Condition
table transRegCodeCondition
"Growth condition associated with a transcription factor"
(
string name; "Name of transcription factor"
string growthCondition; "Growth condition seen in"
)
Vega Info
table vegaInfo
"Vega Genes track additional information"
(
string transcriptId; "Vega transcript ID"
string otterId; "Otter (Ensembl db) transcript ID"
string geneId; "Vega gene ID"
string method; "GTF method field"
string geneDesc; "Vega gene description"
)
Waba Alignments (Abbreviated version)
table chrN_wabaCbr
"Abbreviated Waba alignment table for quick display"
(
string query; "Name of foreign sequence"
uint chromStart; "Start in genomic sequence"
uint chromEnd; "End in genomic sequence"
char strand; "Relative orientation"
uint milliScore; "Identity in parts per thousand"
longblob squeezed; "HMM symbols with target inserts squeezed out"
}
Waba Alignments (Full description)
table wabaCbr
"Information on a Waba alignment"
(
string query; "Name of foreign sequence"
uint qStart; "Start of alignment in query"
uint qEnd; "End of alignment in query"
char qStrand; "Which strand"
string chrom; "Which chromosome"
uint chromStart; "Start in chromosome sequence"
uint chromEnd; "End in chromosome sequence"
uint milliScore "Identity in parts per thousand"
uint symCount; "Number of symbols in following blobs"
longblob qSym; "Query sequence and insert chars"
longblob tSym; "Target sequence and insert chars"
longblob hSym; "HMM symbols"
)
|