<?xml version = "1.0" encoding = "UTF-8"?>
<serviceDescriptions  xmlns="pd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="pd /C:/Documents and Settings/ytanoh/My Documents/taverna-1.4-old/plugins/pedro/services/model/common.xsd">
	<serviceDescription >
		<serviceName>ehmmbuild</serviceName>
		<organisation >
			<organisationName>European Bioinformatics Institute</organisationName>
		</organisation>

		<locationURL>http://www.ebi.ac.uk/soaplab/emboss4/services/hmm.ehmmbuild</locationURL>
		<interfaceWSDL>http://www.ebi.ac.uk/soaplab/emboss4/services/hmm.ehmmbuild?wsdl</interfaceWSDL>
		<serviceDescriptionText>Build a profile HMM from an alignment. Detailed info about this operation can be found at the following links: http://emboss.sourceforge.net/apps/release/4.1/embassy/hmmernew/ehmmbuild.html and http://www.csb.yale.edu/userguides/seq/hmmer/docs/</serviceDescriptionText>
		<operations >
			<serviceOperation >
				<operationName>ehmmbuild</operationName>
				<operationDescriptionText>Build a profile HMM from an alignment. Detailed info about this operation can be found at the following link: http://emboss.sourceforge.net/apps/release/4.1/embassy/hmmernew/ehmmbuild.html and http://www.csb.yale.edu/userguides/seq/hmmer/docs/</operationDescriptionText>
				<operationInputs >
					<parameter >
						<parameterName>alignfile_usa</parameterName>
						<parameterDescription>Type: string. Set of reference (USA). The Uniform Sequence Address, or USA, is a standard way of specifying a sequence to be read into a program in EMBOSS. The most common ways of specifying a sequence is to type (database:entry), where database can be embl, uniprot or swissprot and entry is either the sequence`s entry or ID name, or its Accession number in that database. For example, database:accession --&gt;embl:X65923 or swissprot:Q7M4G0 | database:entry_name --&gt;swissprot:AMIC_PSEAE |database:ID_name --&gt;embl:paamir. Choose either this parameter or the alignfile_direct_data parameter but not both together. Parameter mandatory if the alignfile_direct_data parameter hasn&apos;t been chosen.</parameterDescription>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>alignfile_direct_data</parameterName>
						<parameterDescription>Type: string. Readable set of protein sequences or aligned sequences. Choose this parameter if you want to provide the&apos;alignfile&apos;value as string. Parameter mandatory if the alignfile_usa parameter hasn&apos;t been chosen.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<semanticType>http://www.mygrid.org.uk/ontology#protein_sequence</semanticType>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sformat</parameterName>
						<parameterDescription>Type: string. Specifies the format of the input sequence. This is an optional parameter. The allowed values for this parameter are: gcg, gcg8, embl, swiss, fasta, ncbi, genbank, nbrf, pir, codata, strider, clustal, phylip, acedb, msf, jackknifer, jackknifernon, nexus, nexusnon, treecon, mega, meganon, ig, staden, text, raw. By default the program autodetects the sequence format. </parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sbegin</parameterName>
						<parameterDescription>Type: long. The first position to be used in the sequence, for example: sbegin=1 (default value) means start with the first base in the sequence. This is an optional parameter.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>send</parameterName>
						<parameterDescription>Type: long. The last position to be used in the sequence. This is an optional parameter. By default it is the end the sequence.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sprotein</parameterName>
						<parameterDescription>Type: boolean. Is the sequence a protein? This is an optional parameter. By default the program autodetects if a sequence is a nucleotide or protein sequence.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>snucleotide</parameterName>
						<parameterDescription>Type: boolean. Is the sequence a nucleotide? This is an optional parameter. By default the program autodetects if a sequence is a nucleotide or protein sequence.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sreverse</parameterName>
						<parameterDescription>Type: boolean. Use reverse complement of the nucleic acid sequence. This is an optional parameter. False by default.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>slower</parameterName>
						<parameterDescription>Type: boolean. Convert the sequence to lower case. This is an optional parameter. False by default.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>supper</parameterName>
						<parameterDescription>Type: boolean. Convert the sequence to UPPER case. This is an optional parameter. False by default.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>nhmm</parameterName>
						<parameterDescription>Type: string. Name for this HMM profile. The name can be any string of non-whitespace characters (e.g. one&apos;word&apos;). There is no length limit (at least not one imposed by HMMER). This is an optional parameter.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>strategy</parameterName>
						<parameterDescription>Type: string. All alignments are local with respect to the sequence and are configured to be local (fragmentary) or global with respect to the HMM. The model is also configured to find a single or multiple domains (matches) to a sequence. The options for configuring the model are as follows:(D): The default setting. Multiple domains per sequence, global alignments with respect to the HMM. |(F): Multiple domains per sequence, local alignments with respect to the HMM. |(G) Single domain per sequence, global alignment with respect to the HMM. |(S) Single domain per sequence, local alignments with respect to the HMM. This is an optional parameter. The default value is&apos;D&apos;.</parameterDescription>
						<defaultValue>D</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>prior_direct_data</parameterName>
						<parameterDescription>Type: string. Read a Dirichlet prior, replacing the default mixture Dirichlet. The format of prior files is documented in the User&apos;s Guide, http://www.csb.yale.edu/userguides/seq/hmmer/docs/node1.html. &apos;prior_direct_data&apos;parameter and&apos;prior_url&apos;are mutually exclusive. This is a mandatory parameter if the&apos;prior_url&apos;is not selected.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>prior_url</parameterName>
						<parameterDescription>Type: string. URL of the Dirichlet prior, replacing the default mixture Dirichlet. The format of prior files is documented in the User&apos;s Guide, http://www.csb.yale.edu/userguides/seq/hmmer/docs/node1.html. &apos;prior_direct_data&apos;parameter and&apos;prior_url&apos;are mutually exclusive. This is a mandatory parameter if the &apos;prior_direct_data&apos; is not selected.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>null_direct_data</parameterName>
						<parameterDescription>Type: string. Read a null model. The default for protein is to use average amino acid frequencies from Swissprot 34 and p1 = 350/351; for nucleic acid, the default is to use 0.25 for each base and p1 = 1000/1001. For documentation of the format of the null model file and further explanation of how the null model is used, see the User&apos;s Guide: http://www.csb.yale.edu/userguides/seq/hmmer/docs/node1.html. &apos;null_direct_data&apos;parameter and &apos;null_url&apos; are mutually exclusive. This is a mandatory parameter if &apos;null_url&apos; parameter is not selected.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>null_url</parameterName>
						<parameterDescription>Type: string. Read URL of the null model. The default for protein is to use average amino acid frequencies from Swissprot 34 and p1 = 350/351; for nucleic acid, the default is to use 0.25 for each base and p1 = 1000/1001. For documentation of the format of the null model file and further explanation of how the null model is used, see the User&apos;s Guide: http://www.csb.yale.edu/userguides/seq/hmmer/docs/node1.html.&apos;null_direct_data&apos;parameter and&apos;null_url&apos; are mutually exclusive. This is a mandatory parameter if &apos;null_direct_data&apos; parameter is not selected.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>pam_direct_data</parameterName>
						<parameterDescription>Type: string. HMMER matrix. Apply a heuristic PAM (substitution matrix) based prior on match emission probabilities instead of the default mixture Dirichlet. The substitution matrix is read from file. The default Dirichlet state transition prior and insert emission prior are unaffected.&apos;pam_direct_data&apos;parameter and&apos;pam_url&apos;are mutually exclusive. This is a mandatory parameter if the&apos;pam_url&apos;parameter is not selected.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>pam_url</parameterName>
						<parameterDescription>Type: string. URL of HMMER matrix. Apply a heuristic PAM- (substitution matrix-) based prior on match emission probabilities instead of the default mixture Dirichlet. The substitution matrix is read from file. The default Dirichlet state transition prior and insert emission prior are unaffected. &apos;pam_direct_data&apos; parameter and &apos;pam_url&apos; are mutually exclusive. This is a mandatory parameter if the &apos;pam_direct_data&apos; parameter is not selected.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>pamwgt</parameterName>
						<parameterDescription>Type: float. Controls the weight&lt;x&gt;on a PAM-based prior. Only has effect if&apos;pam&apos;parameter is also in use.&lt;x&gt;is a positive real number, 20.0 by default.&lt;x&gt;is the number of&apos;pseudocounts&apos;contriubuted by the heuristic prior. Very high values of&lt;x&gt;can force a scoring system that is entirely driven by the substitution matrix, making HMMER somewhat approximate Gribskov profiles. This is an optional parameter. The default value is 20.0.</parameterDescription>
						<defaultValue>20.0</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>pbswitch</parameterName>
						<parameterDescription>Type: long. For alignments with a very large number of sequences, the GSC, BLOSUM, and Voronoi weighting schemes are slow. Henikoff position-based weights (PB weights) are more effcient. At or above a certain threshold sequence number&lt;n&gt;hmmbuild will switch from GSC, BLOSUM, or Voronoi weights to PB weights. To disable this switching behavior (at the cost of compute time, set&lt;n&gt;to be something larger than the number of sequences in your alignment.&lt;n&gt;is a positive integer; the default value is 1000. This is an optional parameter.</parameterDescription>
						<defaultValue>1000</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>archpri</parameterName>
						<parameterDescription>Type: float. The value of the&apos;architecture prior&apos;used by MAP architecture construction. This value is a probability between 0 and 1. This parameter governs a geometric prior distribution over model lengths. As&apos;archpri&apos;increases, longer models are favored a priori. As &apos;archpri&apos; decreases, it takes more residue conservation in a column to make a column a &apos;consensus&apos; match column in the model architecture. The default value is 0.85. This is an optional parameter.</parameterDescription>
						<defaultValue>0.85</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>binary</parameterName>
						<parameterDescription>Type: boolean. Write the HMM to file in HMMER binary format instead of readable ASCII text. This is an optional parameter. The default value is&apos;false&apos;.</parameterDescription>
						<defaultValue>false</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>fast</parameterName>
						<parameterDescription>Type: boolean. Quickly and heuristically determine the architecture of the model by assigning all columns with more than a certain fraction of gap characters to insert states. By default this fraction is 0.5, and it can be changed using the&apos;gapmax&apos;parameter. The default construction algorithm is a maximum a posteriori (MAP) algorithm, which is slower. This is an optional parameter. The default value is&apos;false&apos;.</parameterDescription>
						<defaultValue>false</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>gapmax</parameterName>
						<parameterDescription>Type: float. Controls the&apos;fast&apos;parameter model construction algorithm, but if &apos;fast&apos; parameter is not being used, has no effect. If a column has more than a fraction &lt;x&gt; of gap symbols in it, it gets assigned to an insert column.&lt;x&gt;is a frequency from 0 to 1, and by default is set to 0.5. Higher values of &lt;x&gt; mean more columns get assigned to consensus, and models get longer; smaller values of&lt;x&gt;mean fewer columns get assigned to consensus, and models get smaller. This is an optional parameter. The default value is 0.5</parameterDescription>
						<defaultValue>0.5</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>hand</parameterName>
						<parameterDescription>Type: boolean. Specify the architecture of the model by hand: the alignment file must be in SELEX or Stockholm format, and the reference annotation line (RF in SELEX, GC RF in Stockholm) is used to specify the architecture. Any column marked with a non-gap symbol (such as an&apos;x&apos;, for instance) is assigned as a consensus (match) column in the model. This is an optional parameter. The default value is &apos;false&apos;.</parameterDescription>
						<defaultValue>false</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sidlevel</parameterName>
						<parameterDescription>Type: float. Controls the determination of effective sequence number. The sequence alignment is clustered by percent identity, and the number of clusters at a cutoff threshold of &lt;x&gt; is used to determine the effective sequence number. Higher values of&lt;x&gt;give more clusters and higher effective sequence numbers; lower values of&lt;x&gt;give fewer clusters and lower effective sequence numbers.&lt;x&gt;is a fraction from 0 to 1, and by default is set to 0.62 (corresponding to the clustering level used in constructing the BLOSUM62 substitution matrix). This is an optional parameter. The default value is 0.62</parameterDescription>
						<defaultValue>0.62</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>noeff</parameterName>
						<parameterDescription>Type: boolean. Turn off the effective sequence number calculation, and use the true number of sequences instead. This will usually reduce the sensitivity of the final model (so don&apos;t do it without good reason!). This is an optional parameter. The default value is&apos;false&apos;.</parameterDescription>
						<defaultValue>false</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>swentry</parameterName>
						<parameterDescription>Type: float. Controls the total probability that is distributed to local entries into the model, versus starting at the beginning of the model as in a global alignment.&lt;x&gt; is a probability from 0 to 1, and by default is set to 0.5. Higher values of&lt;x&gt;mean that hits that are fragments on their left (N or 5&apos;-terminal) side will be penalized less, but complete global alignments will be penalized more. Lower values of&lt;x&gt;mean that fragments on the left will be penalized more, and global alignments on this side will be favored. You have independent control over local/global alignment behavior for the N/C (5&apos;/3&apos;) termini of your target sequences using &apos;swentry&apos; and &apos;swexit&apos;parameters . This is an optional parameter. The default value is 0.5</parameterDescription>
						<defaultValue>0.5</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>swexit</parameterName>
						<parameterDescription>Type: float. Controls the total probability that is distributed to local exits from the model, versus ending an alignment at the end of the model as in a global alignment. &lt;x&gt; is a probability from 0 to 1, and by default is set to 0.5. Higher values of&lt;x&gt;mean that hits that are fragments on their right (C or 3&apos;-terminal) side will be penalized less, but complete global alignments will be penalized more. Lower values of&lt;x&gt;mean that fragments on the right will be penalized more, and global alignments on this side will be favored. You have independent control over local/global alignment behavior for the N/C (5&apos;/3&apos;) termini of your target sequences using&apos;swentry&apos;and&apos;swexit&apos;parameters. This is an optional parameter. The default value is 0.5.</parameterDescription>
						<defaultValue>0.5</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>verbosity</parameterName>
						<parameterDescription>Type: boolean. Print more possibly useful stuff, such as the individual scores for each sequence in the alignment. This is an optional parameter. The default value is&apos;false&apos;</parameterDescription>
						<defaultValue>false</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>weighting</parameterName>
						<parameterDescription>Type: string. Values (B)(-wblosum in HMMER) Use the BLOSUM filtering algorithm to weight the sequences. Cluster the sequences at a given percentage identity; assign each cluster a total weight of 1.0, distributed equally amongst the members of that cluster. (G)(-wgsc in HMMER) Use the Gerstein/Sonnhammer/Chothia ad hoc sequence weighting algorithm. This is the default. (K)(-wme in HMMER) Use the Krogh/Mitchison maximum entropy algorithm to&apos;weight&apos;the sequences. This supercedes the Eddy/Mitchison/Durbin maximum discrimination algorithm, which gives almost identical weights but is less robust. ME (Maximum Entropy) weighting seems to give a marginal increase in sensitivity over the default GSC weights, but takes a fair amount of time. (W) (-wpb in HMMER) Use the Henikoff position-based weighting scheme. (V) (-wvoronoi in HMMER) Use the Sibbald/Argos Voronoi sequence weighting algorithm in place of the default GSC weighting. (N) (-wnone in HMMER) Turn off all sequence weighting. This is an optional parameter. The allowed values for this parameter are: B, G, K, W, V, N. The default value is G.</parameterDescription>
						<defaultValue>G</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

				</operationInputs>

				<operationOutputs >
					<parameter >
						<parameterName>report</parameterName>
						<parameterDescription>Type: string. A general report for the underlying analysis job.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>detailed_status</parameterName>
						<parameterDescription>Type: long. The exit code of the underlying analysis job, 0 means the job returned normally.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>hmmfile</parameterName>
						<parameterDescription>Returns file containing a HMM profile. </parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>o</parameterName>
						<parameterDescription>Re-saves the starting alignment to file, in Stockholm format. The columns which were assigned to match states will be marked with x&apos;s in an RF annotation line. If either the &apos;hand&apos; or &apos;fast&apos; parameters options were chosen, the alignment may have been slightly altered to be compatible with Plan 7 transitions, so saving the final alignment and comparing to the starting alignment can let you view these alterations. See the User&apos;s Guide for more information on this arcane side effect. </parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>cfile</parameterName>
						<parameterDescription>Saves the observed emission and transition counts to file after the architecture has been determined (e.g. after residues/gaps have been assigned to match, delete, and insert states). This option is used in HMMER development for generating data files useful for training new Dirichlet priors. The format of count files is documented in the User&apos;s Guide. </parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

				</operationOutputs>

			</serviceOperation>

		</operations>

		<serviceType>Soaplab service</serviceType>
	</serviceDescription>

</serviceDescriptions>
