001 /*
002 * Factor.java
003 *
004 * Copyright 2003 Sergio Anibal de Carvalho Junior
005 *
006 * This file is part of NeoBio.
007 *
008 * NeoBio is free software; you can redistribute it and/or modify it under the terms of
009 * the GNU General Public License as published by the Free Software Foundation; either
010 * version 2 of the License, or (at your option) any later version.
011 *
012 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
013 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
014 * PURPOSE. See the GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License along with NeoBio;
017 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018 * Boston, MA 02111-1307, USA.
019 *
020 * Proper attribution of the author as the source of the software would be appreciated.
021 *
022 * Sergio Anibal de Carvalho Junior mailto:sergioanibaljr@users.sourceforge.net
023 * Department of Computer Science http://www.dcs.kcl.ac.uk
024 * King's College London, UK http://www.kcl.ac.uk
025 *
026 * Please visit http://neobio.sourceforge.net
027 *
028 * This project was supervised by Professor Maxime Crochemore.
029 *
030 */
031
032 package neobio.alignment;
033
034 /**
035 * This class is used by {@linkplain FactorSequence} to create a linked list of factors of
036 * a text as induced by its Lempel-Ziv (LZ78) factorisation.
037 *
038 * <P>Each instance of this class represent a string composed of its an ancestor factor's
039 * string plus one character, and contains:
040 *
041 * <UL>
042 * <LI>a pointer to its ancestor factor (the longest factor previously seen in the text
043 * during its LZ78 factorisation);
044 * <LI>the new character;
045 * <LI>a serial number (which represents its order in the text)
046 * <LI>a pointer to the next factor of the text
047 * <LI>its length (number of characters, which is equal to its ancestor's length plus one)
048 * </UL>
049 *
050 * @author Sergio A. de Carvalho Jr.
051 * @see FactorSequence
052 */
053 public class Factor
054 {
055 /**
056 * A pointer to this factor's ancestor, which represents a prefix of this factor's
057 * text.
058 */
059 protected Factor ancestor;
060
061 /**
062 * A pointer to the next factor.
063 */
064 protected Factor next;
065
066 /**
067 * This factor's serial number, which indicates the order of this factor inside the
068 * linked list of factors of a text.
069 */
070 protected int serial_number;
071
072 /**
073 * The number of characters of the text represented by this factor.
074 */
075 protected int length;
076
077 /**
078 * The new character of this factor.
079 */
080 protected char new_char;
081
082 /**
083 * Creates a new empty <CODE>Factor</CODE>. It has no ancestor and no character (both
084 * are set to <CODE>null</CODE>). Its serial number is set to zero as well as its
085 * length.
086 *
087 * <P>This constructor is used to initiate the a linked list of factors of a text. Its
088 * <CODE>next</CODE> pointer is initially <CODE>null</CODE>, but it is typically set
089 * to point to the first factor afterwards (with the <CODE>setNext</CODE> method).
090 *
091 * @see #setNext
092 */
093 public Factor ()
094 {
095 this.ancestor = null;
096 this.next = null;
097 this.serial_number = 0;
098 this.length = 0;
099 this.new_char = 0;
100 }
101
102 /**
103 * Creates a new <CODE>Factor</CODE> instance with the specified serial number and
104 * new character, and pointing to the given ancestor. Its length is set to its
105 * ancestor's length plus 1.
106 *
107 * <P>Its <CODE>next</CODE> pointer is initially <CODE>null</CODE>, but it is
108 * typically set to point to the next factor afterwards (with the <CODE>setNext</CODE>
109 * method).
110 *
111 * @param ancestor this factor's ancestor
112 * @param serial_number this factor's serial number
113 * @param new_char this factor's new character
114 * @see #setNext
115 */
116 public Factor (Factor ancestor, int serial_number, char new_char)
117 {
118 this.ancestor = ancestor;
119 this.serial_number = serial_number;
120 this.new_char = new_char;
121 if (ancestor != null)
122 this.length = ancestor.length() + 1;
123 else
124 throw new IllegalArgumentException ("Ancestor factor cannot be null.");
125 }
126
127 /**
128 * Sets this factor's <CODE>next</CODE> pointer to point to the specified factor.
129 * Although the next factor has typically a serial number equal to this factor's
130 * serial number plus 1, no attempt is made to guarantee this rule. This allows
131 * special constructs or a different order in the factorisation.
132 *
133 * @param next the factor that will be pointed to
134 * @see #getNext
135 */
136 public void setNext (Factor next)
137 {
138 this.next = next;
139 }
140
141 /**
142 * Returns this factor's ancestor factor.
143 *
144 * @return this factor's ancestor factor
145 */
146 public Factor getAncestor ()
147 {
148 return ancestor;
149 }
150
151 /**
152 * This method is a shorthand to return the serial number of this factor's ancestor.
153 * Note that it does not check if this factor has an ancestor or not, therefore, if
154 * it is called on the root factor, a NullPointerException is raised.
155 *
156 * @return the serial number of this factor's ancestor
157 */
158 public int getAncestorSerialNumber ()
159 {
160 return ancestor.getSerialNumber();
161 }
162
163 /**
164 * Returns this factor's next factor.
165 *
166 * @return this factor's next factor
167 * @see #setNext
168 */
169 public Factor getNext ()
170 {
171 return next;
172 }
173
174 /**
175 * Returns this factor's serial number.
176 *
177 * @return this factor's serial number
178 */
179 public int getSerialNumber ()
180 {
181 return serial_number;
182 }
183
184 /**
185 * Returns this factor's length.
186 *
187 * @return this factor's length
188 */
189 public int length ()
190 {
191 return length;
192 }
193
194 /**
195 * Returns this factor's new character.
196 *
197 * @return this factor's new character
198 */
199 public char getNewChar ()
200 {
201 return new_char;
202 }
203
204 /**
205 * Returns a string representation of the text represented by this factor. It inspects
206 * its chain of ancestors up until as far as the root factor, spelling their new
207 * characters out.
208 *
209 * @return a string representation of the text denoted by this factor
210 */
211 public String toString ()
212 {
213 StringBuffer buf = new StringBuffer();
214 Factor ancestor = this;
215
216 while (ancestor.getAncestor() != null)
217 {
218 buf.insert(0, ancestor.getNewChar());
219 ancestor = ancestor.getAncestor();
220 }
221
222 return buf.toString();
223 }
224 }