Appendix: Distance Metrics

DTD
<!--<?xml encoding="UTF-8" ?> -->

<!ELEMENT TM4ML (midas?, dbi_controller?, mev?)>
<!ATTLIST TM4ML version CDATA #REQUIRED>

<!-- midas and dbiController place holders -->
<!ELEMENT midas EMPTY>
<!ELEMENT dbi_controller EMPTY>


<!ELEMENT mev (primary_data, analysis)>
<!ATTLIST mev version CDATA #REQUIRED>

<!ELEMENT analysis (alg_set+)>

<!ELEMENT alg_set (algorithm*)>
<!ATTLIST alg_set set_id CDATA #REQUIRED
                  input_data_ref CDATA #REQUIRED>

<!ELEMENT algorithm (plist, mlist?, output_data)>
<!ATTLIST algorithm alg_id CDATA #REQUIRED
	            input_data_ref CDATA #REQUIRED
                    alg_name CDATA #REQUIRED
                    alg_type ( cluster | cluster-genes | cluster-experiments | data-visualization | data-adjustment | cluster-selection | data-normalization ) #REQUIRED>

<!ELEMENT plist (param*)>

<!ELEMENT param EMPTY>
<!ATTLIST param key CDATA #REQUIRED
		value CDATA #REQUIRED>
                
<!ELEMENT mlist (matrix*)>
<!ELEMENT matrix (element+)>
<!ATTLIST matrix name CDATA #REQUIRED
                 type ( int-array | string-array | FloatMatrix ) #REQUIRED
                 row_dim CDATA #REQUIRED
                 col_dim CDATA #REQUIRED>

<!ELEMENT element EMPTY>
<!ATTLIST element row CDATA #REQUIRED
                  col CDATA #REQUIRED
                  value CDATA #REQUIRED>

<!ELEMENT output_data (data_node+)>
<!ATTLIST output_data output_class 
                ( single-output | multi-cluster-output | multi-gene-cluster-output 
                | multi-expteriment-cluster-output | partition-output) #REQUIRED>

<!-- single-output indicates that the result is one set 
     (usually the result of normalization, filtering, or transform.
     
     multi-cluster-output is produced by many clustering algorithms and
     represents multiple clusters in which each cluster contains vectors
     that are similar.  There is no clear ordering of results.
     Generally to act on this output a selection algorithm should be
     used to select a cluster.
     
     partition-output is a multi cluster output where the clusters are
     ordered and cluster members have a paricular shared quality.
     e.g. Significant genes by a statistical algorithm, elements
     partitioned by classification algorithms. -->                  
                  
<!ELEMENT data_node EMPTY>
<!ATTLIST data_node data_node_id CDATA #REQUIRED
	            name CDATA #REQUIRED>
                                        
<!ELEMENT primary_data (file_list?)>
<!ATTLIST primary_data id CDATA #REQUIRED
                       data_type ( mev | tav | stanford | gpr | affy_abs |
                                   affy_ref | affy_mean) #IMPLIED>            
<!-- want an enumeration of data types (mev|tav|stanford|affy|gpr) -->
<!ELEMENT file_list (file+)>
<!ELEMENT file EMPTY>
<!ATTLIST file file_path CDATA #REQUIRED
               file_type ( data | annot | preference ) #REQUIRED> 

DTD UML Schema