<!-- 

  PAZAR XML DTD

  Version 1.1

  Xml exchanging format for the pazar database.
  For representing regulatory sequence annotation data.

  The root node is a 'pazar' element

-->

<!ELEMENT pazar (project, data, analysis*)>

<!-- ##################### Project Section ################### -->


<!ELEMENT project (user)>
<!ATTLIST project
  pazar_id		ID					#REQUIRED
  project_name	CDATA					#REQUIRED
  edit_date		CDATA					#REQUIRED
  status		(RESTRICTED|PUBLISHED|OPEN)	#REQUIRED
  description	CDATA					#IMPLIED>

<!ELEMENT user EMPTY>
<!ATTLIST user
  pazar_id		ID	#REQUIRED
  first_name	CDATA	#REQUIRED
  last_name		CDATA	#REQUIRED
  username		CDATA	#REQUIRED
  affiliation	CDATA	#IMPLIED>



<!-- ##################### Data Section ################### -->

<!-- root element -->

<!ELEMENT data (homolog|gene_source|marker|construct|dataset|matrix|funct_tf|sample|cell|time|bio_condition|expression|interaction)*>

<!-- child-elements shared by multiple elements -->

<!ELEMENT parameter EMPTY>
<!ATTLIST parameter
  tag		CDATA	#REQUIRED
  value	CDATA	#REQUIRED>

<!ELEMENT db_source (parameter*)>
<!ATTLIST db_source
  db_name	CDATA	#REQUIRED
  db_subset	CDATA	#IMPLIED
  assembly	CDATA	#IMPLIED>

<!ELEMENT coordinate (parameter*, location)>
<!ATTLIST coordinate
  strand	CDATA	#REQUIRED
  begin	CDATA	#REQUIRED
  end		CDATA	#REQUIRED
  length	CDATA	#REQUIRED>

<!ELEMENT location (parameter*, db_source)>
<!ATTLIST location
  species	CDATA	#REQUIRED
  chr		CDATA	#REQUIRED
  band	CDATA	#IMPLIED>

<!ELEMENT method (parameter*)>
<!ATTLIST method
  method		CDATA	#REQUIRED
  description	CDATA	#IMPLIED>

<!ELEMENT ref (parameter*)>
<!ATTLIST ref
  pmid	CDATA	#REQUIRED>

<!-- Hierarchical linking of elements
Implicit links are used that rest on a hierarchical structure:
	<homolog>
		<gene_source>
			<tsr>
				<reg_seq>
					<mutation_set>
						<mutation>
						</mutation>
					</mutation_set>
				</reg_seq>
			</tsr>
			<transcript>
				<tf>
				</tf>
			</transcript>
		</gene_source>
	</homolog>
	<marker>
		<reg_seq>
			<mutation_set>
				<mutation>
				</mutation>
			</mutation_set>
		</reg_seq>
	</marker>
-->

<!ELEMENT homolog (parameter*, db_source, gene_source+, conserved_el*)>
<!ATTLIST homolog
  pazar_id		ID					#REQUIRED
  homology_type	(NA|ORTHOLOG|PARALOG)	#REQUIRED>

<!ELEMENT gene_source (parameter*, db_source, tsr*, transcript*)>
<!ATTLIST gene_source
  pazar_id		ID	#REQUIRED
  db_accn		CDATA	#REQUIRED
  description	CDATA	#IMPLIED>

<!ELEMENT tsr (parameter*, transcript?, reg_seq+)>
<!ATTLIST tsr
  pazar_id			ID	#REQUIRED
  fuzzy_start		CDATA	#REQUIRED
  fuzzy_end			CDATA	#REQUIRED
  predominant_start	CDATA	#IMPLIED>

<!ELEMENT marker (parameter*, db_source, reg_seq+)>
<!ATTLIST marker
  pazar_id		ID	#REQUIRED
  db_accn		CDATA	#REQUIRED
  description	CDATA	#IMPLIED>

<!ELEMENT reg_seq (parameter*, coordinate, mutation_set*)>
<!ATTLIST reg_seq
  pazar_id	ID							#REQUIRED
  tfbs_name	CDATA							#IMPLIED
  sequence	CDATA							#REQUIRED
  quality	(NA|CONSERVED|TESTED|PREDICTED)	#REQUIRED>

<!ELEMENT mutation_set (parameter*, method, ref?, mutation+)>
<!ATTLIST mutation_set
  pazar_id		ID	#REQUIRED
  mutant_name	CDATA	#REQUIRED
  mutated_seq	CDATA	#REQUIRED
  comments		CDATA	#IMPLIED>

<!ELEMENT mutation (parameter*)>
<!ATTLIST mutation
  pazar_id	ID	#REQUIRED
  position	CDATA	#REQUIRED
  base	CDATA	#REQUIRED>

<!ELEMENT transcript (parameter*, db_source, tf*)>
<!ATTLIST transcript
  db_accn	CDATA	#REQUIRED
  isoform	CDATA	#IMPLIED
  comments	CDATA	#IMPLIED>

<!ELEMENT tf (parameter*)>
<!ATTLIST tf
  pazar_id	ID	#REQUIRED
  class	CDATA	#IMPLIED
  family	CDATA	#IMPLIED>

<!-- other elements -->

<!ELEMENT construct (parameter*)>
<!ATTLIST construct
  pazar_id		ID		#REQUIRED
  construct_name	CDATA		#REQUIRED
  description	CDATA		#IMPLIED
  sequence		CDATA		#REQUIRED
  reg_seq_ids	IDREFS	#IMPLIED>

<!ELEMENT matrix (parameter*, db_source, matrix_info?)>
<!ATTLIST matrix
  pazar_id		ID		#REQUIRED
  name		CDATA		#REQUIRED
  db_accn		CDATA		#REQUIRED
  vectora		CDATA		#REQUIRED
  vectorc		CDATA		#REQUIRED
  vectorg		CDATA		#REQUIRED
  vectort		CDATA		#REQUIRED
  sequence_ids	IDREFS	#IMPLIED
  description	CDATA		#IMPLIED>

<!ELEMENT matrix_info (parameter*)>
<!ATTLIST matrix_info
  pazar_id	ID	#REQUIRED
  species	CDATA	#IMPLIED
  pubmed	CDATA	#IMPLIED
  exptype	CDATA	#IMPLIED>

<!ELEMENT dataset (parameter*)>
<!ATTLIST dataset
  pazar_id	ID	#REQUIRED
  dataset_name	CDATA		#REQUIRED
  sequence_ids	IDREFS	#REQUIRED>

<!ELEMENT conserved_el (parameter*)>
<!ATTLIST conserved_el
  pazar_id		ID		#REQUIRED
  reg_seq_ids	IDREFS	#REQUIRED>

<!ELEMENT funct_tf (parameter*, tf_unit+, ref?)>
<!ATTLIST funct_tf
  pazar_id	ID		#REQUIRED
  funct_tf_name	CDATA		#REQUIRED>

<!ELEMENT tf_unit (parameter*)>
<!ATTLIST tf_unit
  pazar_id	ID	#REQUIRED
  tf_id		IDREF	#REQUIRED
  modifications	CDATA	#IMPLIED>

<!ELEMENT sample (parameter*)>
<!ATTLIST sample
  pazar_id		ID	#REQUIRED
  sample_type	CDATA	#REQUIRED
  cell	IDREF	#REQUIRED
  time	IDREF	#IMPLIED>

<!ELEMENT cell (parameter*)>
<!ATTLIST cell
  pazar_id		ID						#REQUIRED 
  name		CDATA						#IMPLIED
  tissue_ontology	CDATA						#IMPLIED
  status		(NA|PRIMARY|CELL__LINE)		#IMPLIED
  description	CDATA						#IMPLIED  
  species		CDATA						#REQUIRED>

	
<!ELEMENT time (parameter*)>
<!ATTLIST time
  pazar_id		ID	#REQUIRED
  name		CDATA	#IMPLIED
  description	CDATA	#IMPLIED
  range_start	CDATA	#IMPLIED 
  range_end	CDATA	#IMPLIED
  scale		CDATA	#REQUIRED>

<!ELEMENT bio_condition (parameter*)>
<!ATTLIST bio_condition
  pazar_id		ID	#REQUIRED
  cond_type		CDATA	#REQUIRED
  molecule		CDATA	#REQUIRED
  description	CDATA	#IMPLIED
  concentration	CDATA	#REQUIRED
  scale		CDATA	#REQUIRED>

<!ELEMENT expression (parameter*)>
<!ATTLIST expression
  pazar_id		ID							#REQUIRED
  qualitative	(HIGHLY__INDUCED|INDUCED|NO__CHANGE|REPRESSED|STRONGLY__REPRESSED|NA)	#IMPLIED
  quantitative	CDATA							#IMPLIED
  scale		CDATA					#IMPLIED
  comments		CDATA							#IMPLIED>

<!ELEMENT interaction (parameter*)>
<!ATTLIST interaction
  pazar_id		ID							#REQUIRED
  qualitative	(SATURATION|GOOD|MARGINAL|POOR|NONE|NA)	#IMPLIED
  quantitative	CDATA							#IMPLIED
  scale		CDATA					#IMPLIED
  comments		CDATA							#IMPLIED>




<!-- ################### Analysis Section ##################-->

<!-- Implicit links are used that rest on a hierarchical structure:
	<analysis>
		<method>
		</method>
		<ref>
		</ref>
		<input_output>
			<input>
			</input>
			<output>
			</output>
		</input_output>
	</analysis>
-->

<!ELEMENT analysis (parameter*, evidence, method, ref?, input_output+)>
<!ATTLIST analysis
  pazar_id		ID	#REQUIRED
  name		CDATA	#REQUIRED
  cell		IDREF	#IMPLIED
  time		IDREF	#IMPLIED
  comments	CDATA	#IMPLIED>

<!ELEMENT evidence (parameter*)>
<!ATTLIST evidence
  type_evid	(CURATED|ADMC|PREDICTION)	#REQUIRED
  status_evid	(APPROVED|PROVISIONAL|ARCHIVABLE|REMOVABLE)	#REQUIRED>

<!ELEMENT input_output (input, output, parameter*)>

<!ELEMENT input EMPTY>
<!ATTLIST input
  inputs	IDREFS	#REQUIRED>

<!ELEMENT output EMPTY>
<!ATTLIST output
  outputs	IDREFS	#REQUIRED>

