<?xml version = "1.0" encoding = "UTF-8"?>
<serviceDescriptions  xmlns="pd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="pd /C:/Documents and Settings/ytanoh/My Documents/taverna-1.4-old/plugins/pedro/services/model/common.xsd">
	<serviceDescription >
		<serviceName>compseq</serviceName>
		<organisation >
			<organisationName>European Bioinformatics Institute</organisationName>
		</organisation>

		<locationURL>http://www.ebi.ac.uk/soaplab/emboss4/services/nucleic_composition.compseq</locationURL>
		<interfaceWSDL>http://www.ebi.ac.uk/soaplab/emboss4/services/nucleic_composition.compseq?wsdl</interfaceWSDL>
		<serviceDescriptionText>Count composition of dimer/trimer/etc words in a sequence. Detailed info about this operation can be found at the following link: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/compseq.html</serviceDescriptionText>
		<operations >
			<serviceOperation >
				<operationName>compseq</operationName>
				<operationDescriptionText>Count composition of dimer/trimer/etc words in a sequence. Detailed info about this operation can be found at the following link: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/compseq.html</operationDescriptionText>
				<operationInputs >
					<parameter >
						<parameterName>sequence_usa</parameterName>
						<parameterDescription>Type: string. Any valid sequence.  The Uniform Sequence Address, or USA, is a standard way of specifying a sequence to be read into a program in EMBOSS. The most common ways of specifying a sequence is to type (database:entry), where database can be embl, uniprot or swissprot and entry is either the sequence`s entry or ID name, or its Accession number in that database. For example, database:accession --&gt;embl:X65923 or swissprot:Q7M4G0 | database:entry_name --&gt;swissprot:AMIC_PSEAE |database:ID_name --&gt;embl:paamir. Choose either this parameter or the sequence_direct_data parameter but not both together. Parameter mandatory if the sequence_direct_data parameter has not been chosen.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sequence_direct_data</parameterName>
						<parameterDescription>Type: string. Any valid sequence.  Choose this parameter if you want to provide the &apos;sequence&apos; value (protein or DNA sequence) as string. Parameter mandatory if the sequence_usa parameter hasn&apos;t been chosen.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<semanticType>http://www.mygrid.org.uk/ontology#biological_sequence</semanticType>
						<parameterFormat>http://www.mygrid.org.uk/ontology#single_sequence_format</parameterFormat>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sformat</parameterName>
						<parameterDescription>Type: string. Specifies the format of the input sequence. This is an optional parameter. The allowed values for this parameter are: gcg, gcg8, embl, swiss, fasta, ncbi, genbank, nbrf, pir, codata, strider, clustal, phylip, acedb, msf, jackknifer, jackknifernon, nexus, nexusnon, treecon, mega, meganon, ig, staden, text, raw. By default the service autodetects the sequence format.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sbegin</parameterName>
						<parameterDescription>Type: long. The first position to be used in the sequence, for example: sbegin=1 (default value) means start with the first base in the sequence. This is an optional parameter.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>send</parameterName>
						<parameterDescription>Type: long. The last position to be used in the sequence. This is an optional parameter. By default it is the end the sequence.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sprotein</parameterName>
						<parameterDescription>Type: boolean. Is the sequence a protein? This is an optional parameter. By default the service can autodetect if a sequence is a nucleotide or protein sequence.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>snucleotide</parameterName>
						<parameterDescription>Type: boolean. Is the sequence a nucleotide? This is an optional parameter. By default the service can autodetect if a sequence is a nucleotide or protein sequence.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>sreverse</parameterName>
						<parameterDescription>Type: boolean. Use reverse complement of the nucleic acid sequence. This is an optional parameter. False by default.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>slower</parameterName>
						<parameterDescription>Type: boolean. Convert the sequence to lower case. This is an optional parameter. False by default.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>supper</parameterName>
						<parameterDescription>Type: boolean. Convert the sequence to UPPER case. This is an optional parameter. False by default.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>word</parameterName>
						<parameterDescription>Type: long. This is the size of word (n-mer) to count. Thus if you want to count codon frequencies, you should enter 3 here. This is a mandatory parameter. The default is 2. </parameterDescription>
						<defaultValue>2</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>infile_direct_data</parameterName>
						<parameterDescription>Type: string. This is a file previously produced by &apos;compseq&apos; that can be used to set the expected frequencies of words in this analysis. The word size in the current run must be the same as the one in this results file. Obviously, you should use a file produced from protein sequences if you are counting protein sequence word frequencies, and you must use one made from nucleotide frequencies if you are analysing a nucleotide sequence. This is an optional parameter.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>infile_url</parameterName>
						<parameterDescription>Type: string. URL of a file previously produced by &apos;compseq&apos; that can be used to set the expected frequencies of words in this analysis. The word size in the current run must be the same as the one in this results file. Obviously, you should use a file produced from protein sequences if you are counting protein sequence word frequencies, and you must use one made from nucleotide frequencies if you are analysing a nucleotide sequence. This is an optional parameter. &apos;infile_url&apos; and &apos;infile_direct_data&apos; parameters are mutually exclusive.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>frame</parameterName>
						<parameterDescription>Type: long. The normal behaviour of &apos;compseq&apos; is to count the frequencies of all words that occur by moving a window of length &apos;word&apos; up by one each time. This option allows you to move the window up by the length of the word each time, skipping over the intervening words. You can count only those words that occur in a single frame of the word by setting this value to a number other than zero. If you set it to 1 it will only count the words in frame 1, 2 will only count the words in frame 2 and so on. This is an optional parameter. The default value is 0.</parameterDescription>
						<defaultValue>0</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>ignorebz</parameterName>
						<parameterDescription>Type: boolean(Yes/No). The amino acid code B represents Asparagine or Aspartic acid and the code Z represents Glutamine or Glutamic acid. These are not commonly used codes and you may wish not to count words containing them, just noting them in the count of &apos;Other&apos; words. This is an optional parameter. The default is &apos;Yes&apos;. </parameterDescription>
						<defaultValue>true</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>reverse</parameterName>
						<parameterDescription>Type: boolean(Yes/No). Set this to be true if you also wish to count words in the reverse complement of a nucleic sequence. This is an optional parameter. The default is &apos;No&apos;.</parameterDescription>
						<defaultValue>false</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>calcfreq</parameterName>
						<parameterDescription>Type: boolean(Yes/No). If this is set true then the expected frequencies of words are calculated from the observed frequency of single bases or residues in the sequences. If you are reporting a word size of 1 (single bases or residues) then there is no point in using this option because the calculated expected frequency will be equal to the observed frequency. Calculating the expected frequencies like this will give an approximation of the expected frequencies that you might get by using an input file of frequencies produced by a previous run of this program. If an input file of expected word frequencies has been specified then the values from that file will be used instead of this calculation of expected frequency from the sequence, even if &apos;calcfreq&apos; is set to be true. This is an optional parameter. The default is &apos;No&apos;. </parameterDescription>
						<defaultValue>false</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>zerocount</parameterName>
						<parameterDescription>Type: boolean (Yes/No). You can make the output results file much smaller if you do not display the words with a zero count. This is an optional parameter. The default is &apos;Yes&apos;. </parameterDescription>
						<defaultValue>true</defaultValue>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

				</operationInputs>

				<operationOutputs >
					<parameter >
						<parameterName>report</parameterName>
						<parameterDescription>Type: string. A general report for the underlying analysis job.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>detailed_status</parameterName>
						<parameterDescription>Type: long. The exit code of the underlying analysis job, 0 means the job returned normally.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

					<parameter >
						<parameterName>outfile</parameterName>
						<parameterDescription>Returns the frequency of a given word lengh.</parameterDescription>
						<isConfigurationParameter>false</isConfigurationParameter>
						<transportDataType>String</transportDataType>
					</parameter>

				</operationOutputs>

				<operationTask>http://www.mygrid.org.uk/ontology#calculating</operationTask>
				<operationResource>http://www.mygrid.org.uk/ontology#EMBL_nucleotide_sequence_database</operationResource>
			</serviceOperation>

		</operations>

		<serviceType>Soaplab service</serviceType>
	</serviceDescription>

</serviceDescriptions>
