Cleaned up formatting, changed title from "not totally hacked proxy" to "Crunch 1.0"

pmg23 [2003-11-04 17:08:35]
Cleaned up formatting, changed title from "not totally hacked proxy" to "Crunch 1.0"
Filename
pervasive/crunch/src/psl/memento/pervasive/crunch/ContentExtractor.java
pervasive/crunch/src/psl/memento/pervasive/crunch/HttpStream.java
pervasive/crunch/src/psl/memento/pervasive/crunch/LineInputStream.java
pervasive/crunch/src/psl/memento/pervasive/crunch/Proxy.java
pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilter.java
pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilterSettings.java
pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyListener.java
pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyThread.java
pervasive/crunch/src/psl/memento/pervasive/crunch/ReadTimeoutException.java
pervasive/crunch/src/psl/memento/pervasive/crunch/SettingsEditor.java
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/ContentExtractor.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/ContentExtractor.java
index a7cc538..8f7d8b4 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/ContentExtractor.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/ContentExtractor.java
@@ -1,2850 +1,2853 @@
+package psl.memento.pervasive.crunch;
+
 import java.io.*;
 import java.net.*;
 import java.util.Hashtable;
-import java.util.Vector;
 import java.util.Properties;
-import java.util.Enumeration;
 import java.util.LinkedList;
 import java.util.Iterator;
-import javax.swing.JPanel;
-import javax.swing.JFrame;
 import org.w3c.dom.*;
 import org.openxml.parser.HTMLParser;
 import org.xml.sax.InputSource;
 import org.apache.xml.serialize.*;
-import java.nio.charset.*;
-import java.nio.ByteBuffer;

-/** This class uses a settings file to determine portions of a
- * web site to remove, thus extracting the true content of a
- * site based on the user's preferences.
+/**
+ * This class uses a settings file to determine portions of a web site to
+ * remove, thus extracting the true content of a site based on the user's
+ * preferences.
+ *
  * @author David Neistadt
  */
 public class ContentExtractor implements ProxyFilter {
-
-    //Instance variables
-    private String mSettingsFile;  //the settings file path
-    private Properties mSettings;  //the settings properties file
-    private InputStream mIn;       //the inputstream to filter
-    private Document mTree;        //the DOM tree for HTML
-    private Hashtable mAdServers;  //hashtable of ad servers
-    private LinkedList mLinksSource; //hashtable of all the removed link sources
-    private LinkedList mLinksText;  //hashtable of all the removed link texts
-    private LinkedList mLinksSourceAll; //hashtable of ALL link sources
-    private LinkedList mLinksTextAll; //hashtable of ALL link texts
-    private LinkedList mImagesSource; //hashtable of ALL image sources
-    private ProxyFilterSettings mSettingsGUI;   //the settings JPanel
-    private boolean mCheckChildren; //boolean to see if children nodes should be checked
-    private String textPrintBuffer; //the line to print when text printing
-    private int numberBlankLines;   //the number of consecutive blank lines
-    private int lengthForTableRemover;  //the cumulative length of text in a table
-    private Node mBodyNode;         //the BODY tag node for the link enqueuer
-
-    //Settings variables
-    public static final String IGNORE_TEXT_LINKS = "Ignore Text Links";
-    private final String IGNORE_TEXT_LINKS_DEF = "true";
-    private boolean ignoreTextLinks;
-
-    public static final String IGNORE_IMAGES = "Ignore Images";
-    private final String IGNORE_IMAGES_DEF = "true";
-    private boolean ignoreImages;
-
-    public static final String IGNORE_SCRIPTS = "Ignore Scripts";
-    private final String IGNORE_SCRIPTS_DEF = "true";
-    private boolean ignoreScripts;
-
-    public static final String IGNORE_STYLES = "Ignore Styles";
-    private final String IGNORE_STYLES_DEF = "false";
-    private boolean ignoreStyles;
-
-    public static final String IGNORE_FORMS = "Ignore Forms";
-    private final String IGNORE_FORMS_DEF = "true";
-    private boolean ignoreForms;
-
-    public static final String IGNORE_META = "Ignore Meta Tags";
-    private final String IGNORE_META_DEF = "true";
-    private boolean ignoreMeta;
-
-    //private final String MINIMUM_TEXT_LENGTH = "Minimum Text Length";
-    //private final String MINIMUM_TEXT_LENGTH_DEF = "0";
-    //private int minTextLength;
-
-    //================================================================
-    //All the settings for link lists - or link cells
-    public static final String IGNORE_LINK_CELLS = "Ignore Link Lists";
-    private final String IGNORE_LINK_CELLS_DEF = "true";
-    private boolean ignoreLinkCells;
-
-    //LC stands for Link Cells
-    public static final String LC_IGNORE_IMAGE_LINKS = "Ignore Image Links in Link Lists";
-    private final String LC_IGNORE_IMAGE_LINKS_DEF = "true";
-    private boolean ignoreLCImageLinks;
-
-    public static final String LC_IGNORE_TEXT_LINKS = "Ignore Text Links in Link Lists";
-    private final String LC_IGNORE_TEXT_LINKS_DEF = "true";
-    private boolean ignoreLCTextLinks;
-
-    public static final String LC_ONLY_LINKS_AND_TEXT = "Ignore Only Links and Text in Link Lists";
-    private final String LC_ONLY_LINKS_AND_TEXT_DEF = "true";
-    private boolean ignoreLCOnlyLinksAndText;
-
-    //End of settings for link lists - or link cells
-    //=================================================================
-
-    public static final String IGNORE_IMAGE_LINKS = "Ignore Image Links";
-    private final String IGNORE_IMAGE_LINKS_DEF = "true";
-    private boolean ignoreImageLinks;
-
-    public static final String IGNORE_INPUT_TAGS = "Ignore <INPUT> Tags";
-    private final String IGNORE_INPUT_TAGS_DEF = "true";
-    private boolean ignoreInputTags;
-
-    public static final String IGNORE_BUTTON_TAGS = "Ignore <BUTTON> Tags";
-    private final String IGNORE_BUTTON_TAGS_DEF = "true";
-    private boolean ignoreButtonTags;
-
-    public static final String IGNORE_SELECT_TAGS = "Ignore <SELECT> Tags";
-    private final String IGNORE_SELECT_TAGS_DEF = "true";
-    private boolean ignoreSelectTags;
-
-    public static final String IGNORE_NOSCRIPT_TAGS = "Ignore <NOSCRIPT> Tags";
-    private final String IGNORE_NOSCRIPT_TAGS_DEF = "true";
-    private boolean ignoreNoscriptTags;
-
-    public static final String IGNORE_CELL_WIDTH = "Ignore Table Cell Widths";
-    private final String IGNORE_CELL_WIDTH_DEF = "false";
-    private boolean ignoreCellWidth;
-
-    public static final String IGNORE_ADS = "Ignore All Advertisements";
-    private final String IGNORE_ADS_DEF = "true";
-    private boolean ignoreAds;
-
-    public static final String ONLY_TEXT = "Print Only Text";
-    private final String ONLY_TEXT_DEF = "false";
-    private boolean onlyText;
-
-    public static final String IGNORE_DIV_STYLES = "Ignore Style Attribute in <DIV> Tags";
-    private final String IGNORE_DIV_STYLES_DEF = "false";
-    private boolean ignoreDivStyles;
-
-    public static final String IGNORE_IFRAME_TAGS = "Ignore <IFRAME> Tags";
-    private final String IGNORE_IFRAME_TAGS_DEF = "false";
-    private boolean ignoreIFrameTags;
-
-    public static final String DISPLAY_IMAGE_ALTS = "Display Image ALTs";
-    private final String DISPLAY_IMAGE_ALTS_DEF = "false";
-    private boolean displayImageAlts;
-
-    public static final String DISPLAY_IMAGE_LINK_ALTS = "Display Image Link ALTs";
-    private final String DISPLAY_IMAGE_LINK_ALTS_DEF = "false";
-    private boolean displayImageLinkAlts;
-
-    ////////////////////////// Empty Table Settings ////////////////////////////
-    public static final String REMOVE_EMPTY_TABLES = "Remove Empty Tables";
-    private final String REMOVE_EMPTY_TABLES_DEF = "true";
-    private boolean removeEmptyTables;
-
-    public static final String SUBSTANCE_IMAGE = "<IMG> tags are substance";
-    private final String SUBSTANCE_IMAGE_DEF = "true";
-    private boolean substanceImage;
-
-    public static final String SUBSTANCE_LINKS = "<A> tags are substance";
-    private final String SUBSTANCE_LINKS_DEF = "true";
-    private boolean substanceLinks;
-
-    public static final String SUBSTANCE_IFRAME = "<IFRAME> tags are substance";
-    private final String SUBSTANCE_IFRAME_DEF = "true";
-    private boolean substanceIFrame;
-
-    public static final String SUBSTANCE_INPUT = "<INPUT> tags are substance";
-    private final String SUBSTANCE_INPUT_DEF = "true";
-    private boolean substanceInput;
-
-    public static final String SUBSTANCE_BUTTON = "<BUTTON> tags are substance";
-    private final String SUBSTANCE_BUTTON_DEF = "true";
-    private boolean substanceButton;
-
-    public static final String SUBSTANCE_TEXTAREA = "<TEXTAREA> tags are substance";
-    private final String SUBSTANCE_TEXTAREA_DEF = "true";
-    private boolean substanceTextarea;
-
-    public static final String SUBSTANCE_SELECT = "<SELECT> tags are substance";
-    private final String SUBSTANCE_SELECT_DEF = "true";
-    private boolean substanceSelect;
-
-    public static final String SUBSTANCE_FORM = "<FORM> tags are substance";
-    private final String SUBSTANCE_FORM_DEF = "false";
-    private boolean substanceForm;
-
-    public static final String SUBSTANCE_MIN_TEXT_LENGTH = "Minimum text length as substance";
-    private final String SUBSTANCE_MIN_TEXT_LENGTH_DEF = "1";
-    private int substanceMinTextLength;
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    public static final String LIMIT_LINEBREAKS = "Limit Number of Line Breaks";
-    private final String LIMIT_LINEBREAKS_DEF = "true";
-    private boolean limitLinebreaks;
-
-    public static final String MAX_LINEBREAKS = "Maximum Number of Line Breaks";
-    private final String MAX_LINEBREAKS_DEF = "2";
-    private int maxLinebreaks;
-
-    public static final String ADD_LINKS_TO_BOTTOM = "Add removed links to bottom of the page";
-    private final String ADD_LINKS_TO_BOTTOM_DEF = "false";
-    private boolean addLinksToBottom;
-
-    public static final String IGNORE_EMBED_TAGS = "Ignore <EMBED> tags";
-    private final String IGNORE_EMBED_TAGS_DEF = "false";
-    private boolean ignoreEmbedTags;
-
-    /*
-     * Link/Text ratio is determined by the amount of text words to single links
-     * A word is considered 4 letters long.
-     */
-    public final static String LINK_TEXT_REMOVAL_RATIO = "Link/Text Removal Ratio";
-    private final String LINK_TEXT_REMOVAL_RATIO_DEF = ".25";
-    private double linkTextRatio;
-
-    //Normal final variables not associated with settings
-    public static final int ALL = 0;
-    public static final int TEXT = 1;
-    public static final int IMAGE = 2;
-    public static final int LETTERS_PER_WORD = 5;
-    public static final String SETTINGS_FILE_DEF = "settings.txt";
-    public static final String AD_FILE = "serverlist.txt";
-    public static final String CONTENT_TEXT = "text/plain";
-    public static final String CONTENT_HTML = "text/html";
-
-    /**
-     * Creates a new instance without any input stream and the
-     * default settings file.
-     */
-    public ContentExtractor() {
-        this(ContentExtractor.SETTINGS_FILE_DEF, null);
-    }
-
-    /**
-     * Creates a new instance without any input stream and with a settings file
-     * @param iSettings the settings file path
-     */
-    public ContentExtractor(String iSettings) {
-        this(iSettings, null);
-    }
-
-    /**
-     * Creates a new instance of ContentExtractor with the default settings file
-     * @param iIn the input stream of the HTML file
-     */
-    public ContentExtractor(InputStream iIn) {
-        this(ContentExtractor.SETTINGS_FILE_DEF, iIn);
-    }
-
-    /** Creates a new instance of ContentExtractor
-     * @param iSettings the name of the settings file
-     * @param iIn the input stream of the HTML file
-     */
-    public ContentExtractor(String iSettings, InputStream iIn) {
-        mSettingsFile = iSettings;
-        mSettings = new Properties();
-        mIn = iIn;
-
-        //Load settings and ad server lists
-        loadSettingsProperties();
-        loadAdsServerList();
-        loadSettings();
-
-        mSettingsGUI = new SettingsEditor(this);
-        textPrintBuffer = "";
-        numberBlankLines = 0;
-        mLinksSource = new LinkedList();
-        mLinksText = new LinkedList();
-        mLinksSourceAll = new LinkedList();
-        mLinksTextAll = new LinkedList();
-        mImagesSource = new LinkedList();
-    }
-
-    /**
-     * Loads the settings into the property file
-     */
-    private void loadSettingsProperties() {
-        try {
-            mSettings.load(new FileInputStream(mSettingsFile));
-        }
-        catch (FileNotFoundException e) {
-            //Don't load the settings if the file doesn't exist
-        }
-
-        catch (IOException e) {
-            e.printStackTrace();
-        }
-    }
-
-    /**
-     * Loads the ad file into a hashtable
-     */
-    public void loadAdsServerList() {
-        mAdServers = new Hashtable();
-
-        try {
-            FileReader fr = new FileReader(new File(AD_FILE));
-            BufferedReader in = new BufferedReader(fr);
-            String line = in.readLine();
-
-            while (line != null) {
-                mAdServers.put(line, line);
-                line = in.readLine();
-            }//while
-        }
-        catch (FileNotFoundException e) {
-            //if the ad file is not there, don't do anything, just print
-            //that the file isn't there
-            System.out.println("WARNING: Server list for ad remover not found");
-        }
-        catch (IOException e) {
-            e.printStackTrace();
-        }
-    }//loadAdsServerList
-
-    /**
-     * Returns a setting based on the final variables
-     * @param iSetting the name of the setting based on the final variables
-     * @return the setting as a string. For boolean values, "true" and "false"
-     * will be returned. Null will be returned if the setting doesn't exist
-     */
-    public String getSetting(String iSetting) {
-        if (iSetting.equalsIgnoreCase(IGNORE_ADS))
-            return Boolean.toString(ignoreAds);
-        else if (iSetting.equalsIgnoreCase(IGNORE_BUTTON_TAGS))
-            return Boolean.toString(ignoreButtonTags);
-        else if (iSetting.equalsIgnoreCase(IGNORE_CELL_WIDTH))
-            return Boolean.toString(ignoreCellWidth);
-        else if (iSetting.equalsIgnoreCase(IGNORE_DIV_STYLES))
-            return Boolean.toString(ignoreDivStyles);
-        else if (iSetting.equalsIgnoreCase(IGNORE_FORMS))
-            return Boolean.toString(ignoreForms);
-        else if (iSetting.equalsIgnoreCase(IGNORE_IFRAME_TAGS))
-            return Boolean.toString(ignoreIFrameTags);
-        else if (iSetting.equalsIgnoreCase(IGNORE_IMAGE_LINKS))
-            return Boolean.toString(ignoreImageLinks);
-        else if (iSetting.equalsIgnoreCase(IGNORE_IMAGES))
-            return Boolean.toString(ignoreImages);
-        else if (iSetting.equalsIgnoreCase(IGNORE_INPUT_TAGS))
-            return Boolean.toString(ignoreInputTags);
-        else if (iSetting.equalsIgnoreCase(IGNORE_LINK_CELLS))
-            return Boolean.toString(ignoreLinkCells);
-        else if (iSetting.equalsIgnoreCase(IGNORE_META))
-            return Boolean.toString(ignoreMeta);
-        else if (iSetting.equalsIgnoreCase(IGNORE_NOSCRIPT_TAGS))
-            return Boolean.toString(ignoreNoscriptTags);
-        else if (iSetting.equalsIgnoreCase(IGNORE_SCRIPTS))
-            return Boolean.toString(ignoreScripts);
-        else if (iSetting.equalsIgnoreCase(IGNORE_SELECT_TAGS))
-            return Boolean.toString(ignoreSelectTags);
-        else if (iSetting.equalsIgnoreCase(IGNORE_STYLES))
-            return Boolean.toString(ignoreStyles);
-        else if (iSetting.equalsIgnoreCase(IGNORE_TEXT_LINKS))
-            return Boolean.toString(ignoreTextLinks);
-        else if (iSetting.equalsIgnoreCase(LC_IGNORE_IMAGE_LINKS))
-            return Boolean.toString(ignoreLCImageLinks);
-        else if (iSetting.equalsIgnoreCase(LC_IGNORE_TEXT_LINKS))
-            return Boolean.toString(ignoreLCTextLinks);
-        else if (iSetting.equalsIgnoreCase(LINK_TEXT_REMOVAL_RATIO))
-            return Double.toString(linkTextRatio);
-        else if (iSetting.equalsIgnoreCase(ONLY_TEXT))
-            return Boolean.toString(onlyText);
-        else if (iSetting.equalsIgnoreCase(LC_ONLY_LINKS_AND_TEXT))
-            return Boolean.toString(ignoreLCOnlyLinksAndText);
-        else if (iSetting.equalsIgnoreCase(DISPLAY_IMAGE_ALTS))
-            return Boolean.toString(displayImageAlts);
-        else if (iSetting.equalsIgnoreCase(DISPLAY_IMAGE_LINK_ALTS))
-            return Boolean.toString(displayImageLinkAlts);
-        else if (iSetting.equalsIgnoreCase(REMOVE_EMPTY_TABLES))
-            return Boolean.toString(removeEmptyTables);
-        else if (iSetting.equalsIgnoreCase(LIMIT_LINEBREAKS))
-            return Boolean.toString(limitLinebreaks);
-        else if (iSetting.equalsIgnoreCase(MAX_LINEBREAKS))
-            return Integer.toString(maxLinebreaks);
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_BUTTON))
-            return Boolean.toString(substanceButton);
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_FORM))
-            return Boolean.toString(substanceForm);
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_IFRAME))
-            return Boolean.toString(substanceIFrame);
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_IMAGE))
-            return Boolean.toString(substanceImage);
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_INPUT))
-            return Boolean.toString(substanceInput);
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_LINKS))
-            return Boolean.toString(substanceLinks);
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_MIN_TEXT_LENGTH))
-            return Integer.toString(substanceMinTextLength);
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_SELECT))
-
-            return Boolean.toString(substanceSelect);
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_TEXTAREA))
-
-            return Boolean.toString(substanceTextarea);
-
-        else if (iSetting.equalsIgnoreCase(ADD_LINKS_TO_BOTTOM))
-
-            return Boolean.toString(addLinksToBottom);
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_EMBED_TAGS))
-
-            return Boolean.toString(ignoreEmbedTags);
-
-
-
-        return null;
-
-    }//getSettings
-
-
-
-    /**
-     *
-     * Sets a setting based on the final variables
-     *
-     * @param iSetting the name of the setting based on the final variables
-     *
-     * @param iValue the desired value of the setting. For boolean values,
-     *
-     * "true" and "false" should be used
-     *
-     */
-
-    public void changeSetting(String iSetting, String iValue) {
-
-        if (iSetting.equalsIgnoreCase(IGNORE_ADS))
-
-            ignoreAds = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_BUTTON_TAGS))
-
-            ignoreButtonTags = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_CELL_WIDTH))
-
-            ignoreCellWidth = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_DIV_STYLES))
-
-            ignoreDivStyles = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_FORMS))
-
-            ignoreForms = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_IFRAME_TAGS))
-
-            ignoreIFrameTags = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_IMAGE_LINKS))
-
-            ignoreImageLinks = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_IMAGES))
-
-            ignoreImages = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_INPUT_TAGS))
-
-            ignoreInputTags = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_LINK_CELLS))
-
-            ignoreLinkCells = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_META))
-
-            ignoreMeta = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_NOSCRIPT_TAGS))
-
-            ignoreNoscriptTags = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_SCRIPTS))
-
-            ignoreScripts = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_SELECT_TAGS))
-
-            ignoreSelectTags = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_STYLES))
-
-            ignoreStyles = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_TEXT_LINKS))
-
-            ignoreTextLinks = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(LC_IGNORE_IMAGE_LINKS))
-
-            ignoreLCImageLinks = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(LC_IGNORE_TEXT_LINKS))
-
-            ignoreLCTextLinks = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(LINK_TEXT_REMOVAL_RATIO))
-
-            linkTextRatio = Double.parseDouble(iValue);
-
-        else if (iSetting.equalsIgnoreCase(ONLY_TEXT))
-
-            onlyText = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(LC_ONLY_LINKS_AND_TEXT))
-
-            ignoreLCOnlyLinksAndText = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(DISPLAY_IMAGE_ALTS))
-
-            displayImageAlts = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(DISPLAY_IMAGE_LINK_ALTS))
-
-            displayImageLinkAlts = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(REMOVE_EMPTY_TABLES))
-
-            removeEmptyTables = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(LIMIT_LINEBREAKS))
-
-            limitLinebreaks = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(MAX_LINEBREAKS))
-
-            maxLinebreaks = Integer.parseInt(iValue);
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_BUTTON))
-
-            substanceButton = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_FORM))
-
-            substanceForm = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_IFRAME))
-
-            substanceIFrame = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_IMAGE))
-
-            substanceImage = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_INPUT))
-
-            substanceInput = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_LINKS))
-
-            substanceLinks = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_MIN_TEXT_LENGTH))
-
-            substanceMinTextLength = Integer.parseInt(iValue);
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_SELECT))
-
-            substanceSelect = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(SUBSTANCE_TEXTAREA))
-
-            substanceTextarea = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(ADD_LINKS_TO_BOTTOM))
-
-            addLinksToBottom = iValue.equals("true");
-
-        else if (iSetting.equalsIgnoreCase(IGNORE_EMBED_TAGS))
-
-            ignoreEmbedTags = iValue.equals("true");
-
-    }//changeSetting
-
-
-
-    /**
-     *
-     * Loads the settings file into the boolean values
-     *
-     */
-
-    public void loadSettings() {
-
-        ignoreTextLinks = (mSettings.getProperty(IGNORE_TEXT_LINKS, IGNORE_TEXT_LINKS_DEF)).equals("true");
-
-        ignoreImageLinks = (mSettings.getProperty(IGNORE_IMAGE_LINKS, IGNORE_IMAGE_LINKS_DEF)).equals("true");
-
-        ignoreImages = (mSettings.getProperty(IGNORE_IMAGES, IGNORE_IMAGES_DEF)).equals("true");
-
-        ignoreScripts = (mSettings.getProperty(IGNORE_SCRIPTS, IGNORE_SCRIPTS_DEF)).equals("true");
-
-        ignoreStyles = (mSettings.getProperty(IGNORE_STYLES, IGNORE_SCRIPTS_DEF)).equals("true");
-
-        ignoreForms = (mSettings.getProperty(IGNORE_FORMS, IGNORE_FORMS_DEF)).equals("true");
-
-        ignoreMeta = (mSettings.getProperty(IGNORE_META, IGNORE_META_DEF)).equals("true");
-
-        ignoreLinkCells = (mSettings.getProperty(IGNORE_LINK_CELLS, IGNORE_LINK_CELLS_DEF)).equals("true");
-
-        ignoreLCImageLinks = (mSettings.getProperty(LC_IGNORE_IMAGE_LINKS, LC_IGNORE_IMAGE_LINKS_DEF)).equals("true");
-
-        ignoreLCTextLinks = (mSettings.getProperty(LC_IGNORE_TEXT_LINKS, LC_IGNORE_TEXT_LINKS_DEF)).equals("true");
-
-        linkTextRatio = Double.parseDouble(mSettings.getProperty(LINK_TEXT_REMOVAL_RATIO, LINK_TEXT_REMOVAL_RATIO_DEF));
-
-        ignoreButtonTags = (mSettings.getProperty(IGNORE_BUTTON_TAGS, IGNORE_BUTTON_TAGS_DEF)).equals("true");
-
-        ignoreInputTags = (mSettings.getProperty(IGNORE_INPUT_TAGS, IGNORE_INPUT_TAGS_DEF)).equals("true");
-
-        ignoreSelectTags = (mSettings.getProperty(IGNORE_SELECT_TAGS, IGNORE_SELECT_TAGS_DEF)).equals("true");
-
-        ignoreNoscriptTags = (mSettings.getProperty(IGNORE_NOSCRIPT_TAGS, IGNORE_NOSCRIPT_TAGS_DEF)).equals("true");
-
-        ignoreCellWidth = (mSettings.getProperty(IGNORE_CELL_WIDTH, IGNORE_CELL_WIDTH_DEF)).equals("true");
-
-        ignoreAds = (mSettings.getProperty(IGNORE_ADS, IGNORE_ADS_DEF)).equals("true");
-
-        onlyText = (mSettings.getProperty(ONLY_TEXT, ONLY_TEXT_DEF)).equals("true");
-
-        ignoreIFrameTags = (mSettings.getProperty(IGNORE_IFRAME_TAGS, IGNORE_IFRAME_TAGS_DEF)).equals("true");
-
-        ignoreDivStyles = (mSettings.getProperty(IGNORE_DIV_STYLES, IGNORE_DIV_STYLES_DEF)).equals("true");
-
-        ignoreLCOnlyLinksAndText = (mSettings.getProperty(LC_ONLY_LINKS_AND_TEXT, LC_ONLY_LINKS_AND_TEXT_DEF)).equals("true");
-
-        displayImageAlts = (mSettings.getProperty(DISPLAY_IMAGE_ALTS, DISPLAY_IMAGE_ALTS_DEF)).equals("true");
-
-        displayImageLinkAlts = (mSettings.getProperty(DISPLAY_IMAGE_LINK_ALTS, DISPLAY_IMAGE_LINK_ALTS_DEF)).equals("true");
-
-        removeEmptyTables = (mSettings.getProperty(REMOVE_EMPTY_TABLES, REMOVE_EMPTY_TABLES_DEF)).equals("true");
-
-        limitLinebreaks = (mSettings.getProperty(LIMIT_LINEBREAKS, LIMIT_LINEBREAKS_DEF)).equals("true");
-
-        maxLinebreaks = Integer.parseInt(mSettings.getProperty(MAX_LINEBREAKS, MAX_LINEBREAKS_DEF));
-
-        substanceButton = (mSettings.getProperty(SUBSTANCE_BUTTON, SUBSTANCE_BUTTON_DEF)).equals("true");
-
-        substanceForm = (mSettings.getProperty(SUBSTANCE_FORM, SUBSTANCE_FORM_DEF)).equals("true");
-
-        substanceIFrame = (mSettings.getProperty(SUBSTANCE_IFRAME, SUBSTANCE_IFRAME_DEF)).equals("true");
-
-        substanceImage = (mSettings.getProperty(SUBSTANCE_IMAGE, SUBSTANCE_IMAGE_DEF)).equals("true");
-
-        substanceInput = (mSettings.getProperty(SUBSTANCE_INPUT, SUBSTANCE_INPUT_DEF)).equals("true");
-
-        substanceLinks = (mSettings.getProperty(SUBSTANCE_LINKS, SUBSTANCE_LINKS_DEF)).equals("true");
-
-        substanceMinTextLength = Integer.parseInt(mSettings.getProperty(SUBSTANCE_MIN_TEXT_LENGTH, SUBSTANCE_MIN_TEXT_LENGTH_DEF));
-
-        substanceSelect = (mSettings.getProperty(SUBSTANCE_SELECT, SUBSTANCE_SELECT_DEF)).equals("true");
-
-        substanceTextarea = (mSettings.getProperty(SUBSTANCE_TEXTAREA, SUBSTANCE_TEXTAREA_DEF)).equals("true");
-
-        addLinksToBottom = (mSettings.getProperty(ADD_LINKS_TO_BOTTOM, ADD_LINKS_TO_BOTTOM_DEF)).equals("true");
-
-        ignoreEmbedTags = (mSettings.getProperty(IGNORE_EMBED_TAGS, IGNORE_EMBED_TAGS_DEF)).equals("true");
-
-    }//loadSettings
-
-
-
-    private void saveProperties() {
-
-        mSettings.setProperty(IGNORE_TEXT_LINKS, Boolean.toString(ignoreTextLinks));
-
-        mSettings.setProperty(IGNORE_IMAGE_LINKS, Boolean.toString(ignoreImageLinks));
-
-        mSettings.setProperty(IGNORE_IMAGES, Boolean.toString(ignoreImages));
-
-        mSettings.setProperty(IGNORE_SCRIPTS, Boolean.toString(ignoreScripts));
-
-        mSettings.setProperty(IGNORE_STYLES, Boolean.toString(ignoreStyles));
-
-        mSettings.setProperty(IGNORE_FORMS, Boolean.toString(ignoreForms));
-
-        mSettings.setProperty(IGNORE_META, Boolean.toString(ignoreMeta));
-
-        mSettings.setProperty(IGNORE_LINK_CELLS, Boolean.toString(ignoreLinkCells));
-
-        mSettings.setProperty(LC_IGNORE_IMAGE_LINKS, Boolean.toString(ignoreLCImageLinks));
-
-        mSettings.setProperty(LC_IGNORE_TEXT_LINKS, Boolean.toString(ignoreLCTextLinks));
-
-        mSettings.setProperty(LINK_TEXT_REMOVAL_RATIO, Double.toString(linkTextRatio));
-
-        mSettings.setProperty(IGNORE_BUTTON_TAGS, Boolean.toString(ignoreButtonTags));
-
-        mSettings.setProperty(IGNORE_INPUT_TAGS, Boolean.toString(ignoreInputTags));
-
-        mSettings.setProperty(IGNORE_SELECT_TAGS, Boolean.toString(ignoreSelectTags));
-
-        mSettings.setProperty(IGNORE_NOSCRIPT_TAGS, Boolean.toString(ignoreNoscriptTags));
-
-        mSettings.setProperty(IGNORE_CELL_WIDTH, Boolean.toString(ignoreCellWidth));
-
-        mSettings.setProperty(IGNORE_ADS, Boolean.toString(ignoreAds));
-
-        mSettings.setProperty(ONLY_TEXT, Boolean.toString(onlyText));
-
-        mSettings.setProperty(IGNORE_IFRAME_TAGS, Boolean.toString(ignoreIFrameTags));
-
-        mSettings.setProperty(IGNORE_DIV_STYLES, Boolean.toString(ignoreDivStyles));
-
-        mSettings.setProperty(LC_ONLY_LINKS_AND_TEXT, Boolean.toString(ignoreLCOnlyLinksAndText));
-
-        mSettings.setProperty(DISPLAY_IMAGE_ALTS, Boolean.toString(displayImageAlts));
-
-        mSettings.setProperty(DISPLAY_IMAGE_LINK_ALTS, Boolean.toString(displayImageLinkAlts));
-
-        mSettings.setProperty(REMOVE_EMPTY_TABLES, Boolean.toString(removeEmptyTables));
-
-        mSettings.setProperty(LIMIT_LINEBREAKS, Boolean.toString(limitLinebreaks));
-
-        mSettings.setProperty(MAX_LINEBREAKS, Integer.toString(maxLinebreaks));
-
-        mSettings.setProperty(SUBSTANCE_BUTTON, Boolean.toString(substanceButton));
-
-        mSettings.setProperty(SUBSTANCE_FORM, Boolean.toString(substanceForm));
-
-        mSettings.setProperty(SUBSTANCE_IFRAME, Boolean.toString(substanceIFrame));
-
-        mSettings.setProperty(SUBSTANCE_IMAGE, Boolean.toString(substanceImage));
-
-        mSettings.setProperty(SUBSTANCE_INPUT, Boolean.toString(substanceInput));
-
-        mSettings.setProperty(SUBSTANCE_LINKS, Boolean.toString(substanceLinks));
-
-        mSettings.setProperty(SUBSTANCE_MIN_TEXT_LENGTH, Integer.toString(substanceMinTextLength));
-
-        mSettings.setProperty(SUBSTANCE_SELECT, Boolean.toString(substanceSelect));
-
-        mSettings.setProperty(SUBSTANCE_TEXTAREA, Boolean.toString(substanceTextarea));
-
-        mSettings.setProperty(ADD_LINKS_TO_BOTTOM, Boolean.toString(addLinksToBottom));
-
-        mSettings.setProperty(IGNORE_EMBED_TAGS, Boolean.toString(ignoreEmbedTags));
-
-    }
-
-
-
-    /**
-     *
-     * Save the settings file
-     *
-     */
-
-    public void saveSettings() {
-
-        saveProperties();
-
-
-
-        try {
-
-            mSettings.store(new FileOutputStream(new File(mSettingsFile)), "Content Extractor Settings File");
-
-        }
-
-        catch (Exception e) {
-
-            e.printStackTrace();
-
-        }
-
-    }
-
-
-
-    /**
-     *
-     * Extracts the content of the html page based on the settings
-     *
-     */
-
-    public void extractContent() {
-
-        HTMLParser parser = new HTMLParser();
-
-        try {
-
-            //Create the input source using the ISO-8859-1 character set
-
-            InputStreamReader reader = new InputStreamReader(mIn, "ISO-8859-1");
-
-            parser.parse(new InputSource(reader));
-
-
-
-            mTree = parser.getDocument();
-
-
-
-            extract(mTree);
-
-
-
-            //Appends the links to the bottom of the page
-
-            if (addLinksToBottom) addEnqueuedLinks();
-
-        } catch (Exception e) {
-
-            e.printStackTrace();
-
-        }
-
-    }
-
-    /**
-     * Extracts content and returns text only without changing settings
-     */
-    public void extractContentAsText() {
-        String lastSetting = getSetting(ContentExtractor.ONLY_TEXT);
-        extractContent();
-        changeSetting(ONLY_TEXT, lastSetting);
-    }
-
-
-    /**
-     *
-     * A recursive algorithm that checks through a node's children and
-     *
-     * filters out what it wants
-     *
-     * @param iNode the node to start checking
-     *
-     */
-
-    private void extract(Node iNode) {
-
-        NodeList children = iNode.getChildNodes();
-
-        if ( children != null ) {
-
-            int len = children.getLength();
-
-            for ( int i = 0; i < len; i++ ) {
-
-                filterNode(children.item(i));
-
-            }
-
-        }
-
-    }
-
-
-
-    /**
-     *
-     * Examines a node and determines if it should be included in the
-     *
-     * extracted DOM tree
-     *
-     * @param iNode the node to filter
-     *
-     */
-
-    private void filterNode(Node iNode) {
-
-        //Boolean that determines if the the children of the node should be filtered
-
-        mCheckChildren = true;
-
-
-
-        //Put the node through the sequence of filters
-
-        passThroughFilters(iNode);
-
-
-
-        if (mCheckChildren) filterChildren(iNode);
-
-    }//filterNode
-
-
-
-    /**
-     *
-     * Passes a node through a set of filters
-     *
-     * @param iNode the node to filter
-     *
-     */
-
-    private void passThroughFilters(Node iNode) {
-
-        //Check to see if the node is a Text node or an element node and
-
-        //act accordingly
-
-
-
-        int type = iNode.getNodeType();
-
-        Node parent = iNode.getParentNode();
-
-
-
-        //Get the attributes of the node
-
-        NamedNodeMap attr = iNode.getAttributes();
-
-
-
-        //Element node
-
-        if (type == Node.ELEMENT_NODE) {
-
-            String name = iNode.getNodeName();
-
-            //================================================================
-            // Set of conditions that just check the nodes without editing or
-            // deleting them
-            //================================================================
-
-            //Any type of link is encountered
-            if (isLink(iNode))
-                recordLink(iNode);
-            if (isImage(iNode));
-                recordImage(iNode);
-
-
-            //================================================================
-
-            // Set of conditions that edit the nodes but don't delete them
-
-            //================================================================
-
-
-
-            //<TD|TABLE width=*> removes widths
-
-            if ((name.equalsIgnoreCase("TD") || name.equalsIgnoreCase("TABLE"))&& ignoreCellWidth) {
-
-                if (hasAttribute(iNode, "width")) removeAttribute(iNode, "width");
-
-            }//if
-
-
-
-            //<DIV style=*> removes style
-
-            if (name.equalsIgnoreCase("DIV") && ignoreDivStyles) {
-
-                if (hasAttribute(iNode, "style")) removeAttribute(iNode, "style");
-
-            }//if
-
-
-
-            //================================================================
-
-            //Set of conditionals determining what to ignore and not to ignore
-
-            // (Conditions that DELETE nodes from the DOM tree)
-
-            //================================================================
-
-
-
-            if (isAdLink(iNode) && ignoreAds) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<TD> with Link/Text Ratio higher than threshold
-
-            else if (name.equalsIgnoreCase("TD") && ignoreLinkCells) {
-
-                testRemoveCell(iNode);
-
-            }
-
-            //<A HREF> with no Images
-
-            else if (isTextLink(iNode) && ignoreTextLinks) {
-
-                parent.removeChild(iNode);
-
-                if (addLinksToBottom) enqueueLink(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<A HREF> with Images
-
-            else if (isImageLink(iNode) && ignoreImageLinks) {
-
-                if (displayImageLinkAlts) {
-
-                    Node alt = null;
-
-                    boolean image = isImage(iNode);
-
-
-
-                    //Make sure the image link is the image
-
-                    if (image) alt = createImageLinkAltNode(iNode);
-
-                    if (alt != null) {
-
-                        Node replaced = parent.getParentNode().insertBefore(alt, iNode.getParentNode());
-
-                    }//if
-
-
-
-                    //Remove the image and the link
-
-                    if (image) {
-
-                        parent.removeChild(iNode);
-
-
-
-                        //Only remove the link if there are no more children
-
-                        //to prevent NullPointerExceptions
-
-                        if (!parent.hasChildNodes())
-
-                            parent.getParentNode().removeChild(parent);
-
-                    }
-
-                }//if
-
-                else
-
-                    parent.removeChild(iNode);
-
-            }
-
-            //<IMG*>
-
-            else if (name.equalsIgnoreCase("IMG") && ignoreImages && !isImageLink(iNode)) {
-
-                if (displayImageAlts) {
-
-
-
-                    Node alt = createAltNode(iNode);
-
-                    if (alt != null) {
-
-                        Node replaced = parent.insertBefore(alt, iNode);
-
-                    }//if
-
-                }//if
-
-
-
-                parent.removeChild(iNode);
-
-            }
-
-            //<SCRIPT>
-
-            else if (name.equalsIgnoreCase("SCRIPT") && ignoreScripts) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<NOSCRIPT>
-
-            else if (name.equalsIgnoreCase("NOSCRIPT") && ignoreNoscriptTags) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<NOSCRIPT> removal and save children
-
-            else if (name.equalsIgnoreCase("NOSCRIPT") && ignoreScripts) {
-
-                if (iNode.hasChildNodes()) {
-
-                    Node current = iNode.getFirstChild();
-
-                    while (current != null) {
-
-                        Node next = current.getNextSibling();
-
-                        //reinsert child before NOSCRIPT node
-
-                        parent.insertBefore(current, iNode);
-
-                        current = next;
-
-                    }//while
-
-                }//if
-
-
-
-                parent.removeChild(iNode);
-
-            }//else if
-
-            //<STYLE>
-
-            else if (name.equalsIgnoreCase("STYLE") && ignoreStyles) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<META>
-
-            else if (name.equalsIgnoreCase("META") && ignoreMeta) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<FORM>
-
-            else if (name.equalsIgnoreCase("FORM") && ignoreForms) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<INPUT>
-
-            else if (name.equalsIgnoreCase("INPUT") && ignoreInputTags) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<BUTTON>
-
-            else if (name.equalsIgnoreCase("BUTTON") && ignoreButtonTags) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<SELECT>
-
-            else if (name.equalsIgnoreCase("SELECT") && ignoreSelectTags) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<IFRAME>
-
-            else if (name.equalsIgnoreCase("IFRAME") && ignoreIFrameTags) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }
-
-            //<TABLE>
-
-            else if (name.equalsIgnoreCase("TABLE") && removeEmptyTables) {
-
-                //Call method that removes empty tables
-
-                removeEmptyTables(iNode);
-
-                mCheckChildren = false;
-
-            }//else if
-
-            //<EMBED>
-
-            else if (name.equalsIgnoreCase("EMBED") && ignoreEmbedTags) {
-
-                parent.removeChild(iNode);
-
-                mCheckChildren = false;
-
-            }//else if
-
-            //<BODY>
-
-            else if (name.equalsIgnoreCase("BODY")) mBodyNode = iNode;
-
-        }//if
-
-
-
-        //Text node
-
-        else if (type == Node.TEXT_NODE) {
-
-            String value = iNode.getNodeValue();
-
-
-
-            //================================================================
-
-            //Set of conditions determining what text to ignore
-
-            //================================================================
-
-
-
-            //none so far
-
-
-
-        }//else if
-
-    }
-
-
-
-    /**
-     *
-     * Filter child nodes
-     *
-     * @param iNode the node to filter the children
-     *
-     */
-
-    private void filterChildren(Node iNode) {
-
-        if (iNode.hasChildNodes()) {
-
-            Node next = iNode.getFirstChild();
-
-
-
-            while (next != null) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-                filterNode(current);
-
-            }
-
-        }
-
-    }//filterChildren
-
-
-
-    /**
-     *
-     * Removes empty tables
-     *
-     * @param iNode the table node to examine
-     *
-     */
-
-    private void removeEmptyTables(Node iNode) {
-
-        //First filter the children but check for
-
-        //undeleted nodes
-
-        if (iNode.hasChildNodes()) {
-
-            Node next = iNode.getFirstChild();
-
-
-
-            while (next != null) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-                filterNode(current);
-
-            }//while
-
-        }//if
-
-
-
-        //Check to see if the table is actually empty
-
-        //but reset length recognizer
-
-        lengthForTableRemover = 0;
-
-        boolean empty = processEmptyTable(iNode);
-
-
-
-        if (empty) iNode.getParentNode().removeChild(iNode);
-
-    }//removeEmptyTables
-
-
-
-    /**
-     *
-     * Recursively check children nodes to see if the table is empty
-     *
-     * @param iNode the node to recursively check.
-     *
-     * @return true if the nodes are empty, false if they are not
-     *
-     */
-
-    private boolean processEmptyTable(Node iNode) {
-
-        //The variable that determines if the table is empty
-
-        boolean empty = true;
-
-
-
-        //Determine the type of the node
-
-        int type = iNode.getNodeType();
-
-        String name = iNode.getNodeName();
-
-
-
-        //If it is an element
-
-        if (type == Node.ELEMENT_NODE) {
-
-            //Check to make sure if there are any elements that have
-
-            //substance according to what settings are set
-
-            if (name.equalsIgnoreCase("IMG") && substanceImage) empty = false;
-
-            if (name.equalsIgnoreCase("A") && substanceLinks) empty = false;
-
-            if (name.equalsIgnoreCase("BUTTON") && substanceButton) empty = false;
-
-            if (name.equalsIgnoreCase("FORM") && substanceForm) empty = false;
-
-            if (name.equalsIgnoreCase("IFRAME") && substanceIFrame) empty = false;
-
-            if (name.equalsIgnoreCase("INPUT") && substanceInput) empty = false;
-
-            if (name.equalsIgnoreCase("SELECT") && substanceSelect) empty = false;
-
-            if (name.equalsIgnoreCase("TEXTAREA") && substanceTextarea) empty = false;
-
-        }//if
-
-        else if (type == Node.TEXT_NODE) {
-
-            //Trim the text and make sure there is no more substance
-
-            lengthForTableRemover += iNode.getNodeValue().trim().length();
-
-            if (lengthForTableRemover >= substanceMinTextLength) empty = false;
-
-        }//else if
-
-
-
-        //Process the children
-
-        if (iNode.hasChildNodes()) {
-
-            Node next = iNode.getFirstChild();
-
-
-
-            while (next != null && empty) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-                empty = processEmptyTable(current);
-
-            }//while
-
-        }//if
-
-
-
-        return empty;
-
-    }//processEmptyTable
-
-
-
-    /**
-     *
-     * Creates a new node from an image link node that creates a link to
-     *
-     * the image and the target of the image link.
-     *
-     * @param iNode the <IMG> node that is within the <A> tag
-     *
-     * @return the new node or null if something went wrong
-     *
-     */
-
-    private Node createImageLinkAltNode(Node iNode) {
-
-        //Make sure it is an image link and an image
-
-        if (!isImage(iNode)) return null;
-
-        if (!isImageLink(iNode)) return null;
-
-
-
-        //Determine if there is an ALT tag
-
-        String altTag = "";
-
-        Node attr = iNode.getAttributes().getNamedItem("alt");
-
-        if (attr == null) altTag = "-Link-";
-
-        else if (attr.getNodeValue().trim() == "") altTag = "-Link-";
-
-        else altTag = attr.getNodeValue();
-
-
-
-        //Determine the source of the image
-
-        String imageSource = "";
-
-        Node attrSource = iNode.getAttributes().getNamedItem("src");
-
-        if (attrSource == null) return null;
-
-        else if (attrSource.getNodeValue().trim() == "") return null;
-
-        else imageSource = attrSource.getNodeValue();
-
-
-
-        //Determine the href of the link
-
-        String linkHref = "";
-
-        Node link = iNode.getParentNode();
-
-        linkHref = link.getAttributes().getNamedItem("href").getNodeValue();
-
-        if (linkHref == null) return null;
-
-        else if (linkHref.trim() == "") return null;
-
-
-
-        //CONSTRUCT REPLACEMENT NODE
-
-        Element parent = mTree.createElement("B");
-
-        Element italic = mTree.createElement("I");
-
-
-
-        Element imageLink = mTree.createElement("A");
-
-        imageLink.setAttribute("href", imageSource);
-
-
-
-        Element altLink = mTree.createElement("A");
-
-        altLink.setAttribute("href", linkHref);
-
-
-
-        Node openBracket = mTree.createTextNode("[");
-
-        Node closeBracket = mTree.createTextNode("]");
-
-        Node seperator = mTree.createTextNode(" | ");
-
-        Node imageLinkText = mTree.createTextNode("Image");
-
-        Node altLinkText = mTree.createTextNode(altTag);
-
-
-
-        //Link together nodes
-
-        parent.appendChild(openBracket);
-
-        parent.appendChild(imageLink);
-
-        imageLink.appendChild(imageLinkText);
-
-        parent.appendChild(seperator);
-
-        parent.appendChild(italic);
-
-        italic.appendChild(altLink);
-
-        altLink.appendChild(altLinkText);
-
-        parent.appendChild(closeBracket);
-
-
-
-        //Return node
-
-        return parent;
-
-    }//createImageLinkAltNode
-
-
-
-    /**
-     *
-     * Creates a new node that creates a link to an image node using ALT text
-     *
-     * @param iNode the image node
-     *
-     * @return the node to add to the DOM tree or null if the node isn't an image
-     *
-     * or doesn't have an ALT attribute.
-     *
-     */
-
-    private Node createAltNode(Node iNode) {
-
-        if (!isImage(iNode)) return null;
-
-
-
-        //Determine if there is an ALT tag
-
-        Node attr = iNode.getAttributes().getNamedItem("alt");
-
-        if (attr == null) return null;
-
-        if (attr.getNodeValue().trim() == "") return null;
-
-
-
-        //Determine if there is a src
-
-        Node attrLink = iNode.getAttributes().getNamedItem("src");
-
-        if (attrLink == null) return null;
-
-
-
-        //Create new link node
-
-        Element altNode = mTree.createElement("A");
-
-
-
-        //Add text
-
-        altNode.setAttribute("href", attrLink.getNodeValue());
-
-        //Bold Element
-
-        Node bold = mTree.createElement("B");
-
-        Node textNode = mTree.createTextNode("[" + attr.getNodeValue() + "]");
-
-        bold.appendChild(textNode);
-
-        altNode.appendChild(bold);
-
-        altNode.setNodeValue("");
-
-
-
-        return altNode;
-
-    }//getAltNode
-
-
-
-    /**
-     *
-     * Determines if a node has a link to an ad
-     *
-     * @param iNode the node to check for ads
-     *
-     * @return true if the node is a link to an ad, or false if it isn't
-     *
-     */
-
-    private boolean isAdLink(Node iNode) {
-
-        String attr = "";
-
-
-
-        if (hasAttribute(iNode, "href")) attr = "href";
-
-        else if (hasAttribute(iNode, "src")) attr = "src";
-
-
-
-        //Doesn't had the required attributes
-
-        if (attr.equals("")) return false;
-
-
-
-        //Get the address of the potential ad
-
-        Node attrNode = iNode.getAttributes().getNamedItem(attr);
-
-        String address = attrNode.getNodeValue();
-
-
-
-        try {
-
-            URL addressURL = new URL(address);
-
-            String host = addressURL.getHost();
-
-
-
-            if (mAdServers.get(host) != null) return true;
-
-        } catch (Exception e) {
-
-            //Don't do anything because if the URL is malformed, it
-
-            //probably doesn't point towards an advertisement domain
-
-        }//catch
-
-
-
-        return false;
-
-    }//isAdLink
-
-
-
-    /**
-     *
-     * Removes an attribute if the attrbiute exists from an Element node
-     *
-     * @param iNode the node
-     *
-     * @param iAttr the name of the attribute
-     *
-     */
-
-    private void removeAttribute(Node iNode, String iAttr) {
-
-        iNode.getAttributes().removeNamedItem(iAttr);
-
-    }//removeAttribute
-
-
-
-    /**
-     *
-     * Adds an attribute to an Element node
-     *
-     * @param iNode the node
-     *
-     * @param iName the name of the attribute
-     *
-     * @param iValue the value of the attribute
-     *
-     */
-
-    private void addAttribute(Node iNode, String iName, String iValue) {
-
-        Attr attr = mTree.createAttribute(iName);
-
-        attr.setValue(iValue);
-
-        iNode.getAttributes().setNamedItem(attr);
-
-    }//addAttribute
-
-
-
-    /**
-     *
-     * Checks to see if an attribute exists in an Element node
-     *
-     * @param iNode the node
-     *
-     * @param iAttr the name of the attribute to check for
-     *
-     * @return true if the attribute exists, false if it doesn't
-     *
-     */
-
-    private boolean hasAttribute(Node iNode, String iAttr) {
-
-        Node attr = iNode.getAttributes().getNamedItem(iAttr);
-
-        if (attr == null) return false;
-
-        else return true;
-
-    }//hasAttribute
-
-
-
-    /**
-     *
-     * Removes a table cell if the link ratio is appropriate
-     *
-     * @param iNode the table cell node
-     *
-     */
-
-    public void testRemoveCell(Node iNode) {
-
-        //Ignore if the cell has no children
-
-        if (!iNode.hasChildNodes()) return;
-
-
-
-        double links;
-
-        double words;
-
-        int type = ALL;
-
-
-
-        if (ignoreLCImageLinks && ignoreLCTextLinks) type = ALL;
-
-        else if (ignoreLCImageLinks) type = IMAGE;
-
-        else if (ignoreLCTextLinks) type = TEXT;
-
-
-
-        //Count up links and words
-
-        links = getNumLinks(iNode, type);
-
-        words = getNumWords(iNode);
-
-
-
-        //Compute the ratio and check for divide by 0
-
-        double ratio = 0;
-
-        if (words == 0) ratio = linkTextRatio + 1;
-
-        else ratio = links/words;
-
-
-
-        if (ratio > linkTextRatio) {
-
-            Node next = iNode.getFirstChild();
-
-            while (next != null) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-
-
-                //Check to see if only text and link nodes should be removed
-
-                if (ignoreLCOnlyLinksAndText) {
-
-                    removeLinksAndText(current, type);
-
-                }
-
-                else {
-
-                    Node next2 = iNode.getFirstChild();
-
-                    while (next2 != null) {
-
-                        Node current2 = next;
-
-                        next2 = current2.getNextSibling();
-
-                        removeAll(current2);
-
-                    }//while
-
-
-
-                    //Don't check the children because they are all removed
-
-                    mCheckChildren = false;
-
-                }//else
-
-            }
-
-        }
-
-    }//testRemoveCell
-
-
-
-    /**
-     *
-     * Recursive function that removes everything
-     *
-     * @param iNode the node to start removing children from
-     *
-     */
-
-    private void removeAll(Node iNode) {
-
-        if (isTextLink(iNode) && addLinksToBottom) {
-
-            enqueueLink(iNode);
-
-        }
-
-        else {
-
-            Node next = iNode.getFirstChild();
-
-            while (next != null) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-                removeAll(current);
-
-            }//while
-
-        }//while
-
-
-
-        iNode.getParentNode().removeChild(iNode);
-
-    }//removeChild
-
-
-
-    /**
-     *
-     * Recursive function that removes links and text nodes
-     *
-     * @param iNode the node to edit
-     *
-     * @param iType the type of links to remove
-     *
-     */
-
-    private void removeLinksAndText(Node iNode, int iType) {
-
-        if (isLink(iNode) || iNode.getNodeType() == Node.TEXT_NODE) {
-
-            if (iType == ALL) iNode.getParentNode().removeChild(iNode);
-
-            if (iType == IMAGE && isImageLink(iNode)) iNode.getParentNode().removeChild(iNode);
-
-            if (iType == TEXT && !isImageLink(iNode)) iNode.getParentNode().removeChild(iNode);
-
-
-
-            if (isTextLink(iNode) && addLinksToBottom) enqueueLink(iNode);
-
-        }
-
-        else {
-
-            Node next = iNode.getFirstChild();
-
-            while (next != null) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-                removeLinksAndText(current, iType);
-
-            }//while
-
-        }//else
-
-    }//isDeeperLink
-
-
-
-    /**
-     *
-     * Determines if a domain is an ad domain
-     *
-     * @param iDomain the the domain to check
-     *
-     * @return true if the domain is an ad domain, false if it is not.
-     *
-     */
-
-    private boolean isAdDomain(String iDomain) {
-
-        if (mAdServers.get(iDomain) == null) return false;
-
-        return true;
-
-    }//isAdDomain
-
-
-
-    /**
-     *
-     * Counts the number of links from one node downward
-     *
-     * @param iNode the node to start counting from
-     *
-     * @param iType the type of links to count.
-     *
-     * @return the number of links
-     *
-     */
-
-    private double getNumLinks(Node iNode, int iType) {
-
-        double links = 0;
-
-
-
-        if (iNode.hasChildNodes()) {
-
-            Node next = iNode.getFirstChild();
-
-
-
-            while (next != null) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-                links += getNumLinks(current, iType);
-
-            }
-
-        }
-
-
-
-        switch (iType) {
-
-            case ALL:
-
-                if (isLink(iNode)) links++;
-
-                break;
-
-            case TEXT:
-
-                if (isTextLink(iNode)) links++;
-
-                break;
-
-            case IMAGE:
-
-                if (isImageLink(iNode)) links++;
-
-                break;
-
-        }//switch
-
-
-
-        return links;
-
-    }//getNumLinks
-
-
-
-    /**
-     *
-     * Checks to see if a node is a link
-     *
-     * @param iNode the node to check
-     *
-     * @return true if the node is a link, false if it is not
-     *
-     */
-
-    private boolean isLink(Node iNode) {
-
-        //Check to see if the node is a Text node or an element node
-
-        int type = iNode.getNodeType();
-
-        NamedNodeMap attr = iNode.getAttributes();
-
-
-
-        //Element node
-
-        if (type == Node.ELEMENT_NODE) {
-
-            String name = iNode.getNodeName();
-
-
-
-            //Check to see if it is a link
-
-            if (name.equalsIgnoreCase("A")) {
-
-                for (int i=0; i < attr.getLength(); i++ ) {
-
-                    if (attr.item(i).getNodeName().equalsIgnoreCase("HREF")) {
-
-                        return true;
-
-                    }//if
-
-                }//fot
-
-            }//else if
-
-        }//if
-
-
-
-        return false;
-
-    }
-
-
-
-    /**
-     *
-     * Checks to see if a node is a link with an image as the link or if the
-     *
-     * node is an image, it checks if it is a link
-     *
-     * @param iNode the node to check
-     *
-     * @return true if the node is a link with an image, false if it is not
-     *
-     */
-
-    private boolean isImageLink(Node iNode) {
-
-        boolean imageLink = false;
-
-
-
-        //Check to see if the node is a link
-
-        if (isLink(iNode)) {
-
-
-
-            //Check to see if the children have an image in it
-
-            if (iNode.hasChildNodes()) {
-
-                Node next = iNode.getFirstChild();
-
-
-
-                while (next != null && !imageLink) {
-
-                    Node current = next;
-
-                    next = current.getNextSibling();
-
-                    if (isImage(current)) imageLink = true;
-
-                }//while
-
-            }//if
-
-        }//if
-
-        //If the node is an image, check if its parent is a link
-
-        else if (isImage(iNode)) {
-
-            if (isLink(iNode.getParentNode())) imageLink = true;
-
-        }//else if
-
-
-
-        return imageLink;
-
-    }//isImageLink
-
-
-
-    /**
-     *
-     * Checks to see if a node is an image
-     *
-     * @param iNode the node to check
-     *
-     * @return true if the node is an image, false if it is not
-     *
-     */
-
-    private boolean isImage(Node iNode) {
-
-        boolean image = false;
-
-
-
-        //Check to see if the node is an image
-
-        int type = iNode.getNodeType();
-
-        if (type == Node.ELEMENT_NODE) {
-
-            if (iNode.getNodeName().equalsIgnoreCase("IMG")) image = true;
-
-        }//if
-
-
-
-        return image;
-
-    }
-
-
-
-    /**
-     *
-     * Determines if a link is a text link
-     *
-     * @param iNode the node to analyze
-     *
-     * @return true if the node is a text link and false if it is not.
-     *
-     */
-
-    private boolean isTextLink(Node iNode) {
-
-        return !isImageLink(iNode) && isLink(iNode);
-
-    }//isTextLink
-
-
-
-    /**
-     *
-     * Counts the number of links from one node downward
-     *
-     * @param iNode the node to start counting from
-     *
-     * @return the number of links
-     *
-     */
-
-    private double getNumWords(Node iNode) {
-
-        double words = 0;
-
-
-
-        if (iNode.hasChildNodes()) {
-
-            Node next = iNode.getFirstChild();
-
-
-
-            while (next != null) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-
-
-                //If it is a link, don't go any deeper into it
-
-                if (!isLink(current)) words += getNumWords(current);
-
-            }
-
-        }
-
-
-
-        //Check to see if the node is a Text node or an element node
-
-        int type = iNode.getNodeType();
-
-
-
-        //Text node
-
-        if (type == Node.TEXT_NODE) {
-
-            String content = iNode.getNodeValue();
-
-            words += ((double)content.length())/LETTERS_PER_WORD;
-
-        }//if
-
-
-
-        return words;
-
-    }//getNumLinks
-
-
-
-    /**
-     * Prepares a link node to be added to the bottom of the page by adding
-     * it to the Hashtable
-     * @param iLinkNode the link node to add o the bottom of the page
-     */
-    private void enqueueLink(Node iLinkNode) {
-        //Make sure the node is a link
-        if (!isTextLink(iLinkNode)) return;
-
-        //Get the source of the text link
-        String source = iLinkNode.getAttributes().getNamedItem("href").getNodeValue();
-        String text = iLinkNode.getFirstChild().getNodeValue();
-
-        if (source != null && text != null) {
-            mLinksSource.add(source);
-            mLinksText.add(text);
-        }
-    }//enqueueLink
-
-    /**
-     * Records the presence of a link
-     * @param iLinkNode the link node
-     */
-    private void recordLink(Node iLinkNode) {
-        //Make sure the node is a link
-        if (!isLink(iLinkNode)) return;
-
-        //Get the source of the text link
-        String source = iLinkNode.getAttributes().getNamedItem("href").getNodeValue();
-        String text = iLinkNode.getFirstChild().getNodeValue();
-
-        if (source != null) {
-            mLinksSource.add(source);
-            mLinksText.add(text);
-        }
-    }//recordLink
-
-    /**
-     * Records the presence of an image
-     * @param iImageNode the image node
-     */
-    private void recordImage(Node iImageNode) {
-        //Make sure the node is an image
-        if (!isImage(iImageNode)) return;
-
-        //get source of the image
-        String source = iImageNode.getAttributes().getNamedItem("src").getNodeValue();
-
-        if (source != null) {
-            mImagesSource.add(source);
-        }
-    }
-
-    /**
-     * Returns a linked list containing all the image sources
-     * @return a linked list containing all the image sources
-     */
-    public LinkedList getImageSources() {
-        return mImagesSource;
-    }
-
-    /**
-     * Returns a linked list containing all the link sources
-     * @return a linked list containing all the link sources
-     */
-    public LinkedList getLinkSources() {
-        return mLinksSourceAll;
-    }
-
-    /**
-     * Returns a linked list containing all the link text
-     * @return a linked list containing all the link text
-     */
-    public LinkedList getLinkText() {
-        return mLinksTextAll;
-    }
-
-    /**
-     *
-     * Add enqueued links to bottom of page
-     *
-     */
-
-    private void addEnqueuedLinks() {
-
-        //Make sure the body node isn't null
-
-        if (mBodyNode == null) return;
-
-
-
-        //Make sure there are links enqueued
-
-        if (mLinksSource.size() == 0) return;
-
-        if (mLinksText.size() == 0) return;
-
-
-
-        //Start adding formating
-
-        Element center = mTree.createElement("CENTER");
-
-        Element table = mTree.createElement("TABLE");
-
-        table.setAttribute("cellpadding", "5");
-
-        table.setAttribute("width", "100%");
-
-        Element tablerow = mTree.createElement("TR");
-
-        Element tablecell = mTree.createElement("TD");
-
-        tablecell.setAttribute("bgcolor", "white");
-
-        Element headerTag = mTree.createElement("H3");
-
-        Node header = mTree.createTextNode("Removed Links:");
-
-
-
-        //Append them
-
-        mBodyNode.appendChild(table);
-
-        table.appendChild(tablerow);
-
-        tablerow.appendChild(tablecell);
-
-        tablecell.appendChild(center);
-
-        center.appendChild(headerTag);
-
-        headerTag.appendChild(header);
-
-
-
-        Iterator itrSource = mLinksSource.listIterator(0);
-
-        Iterator itrText = mLinksText.listIterator(0);
-
-
-
-        //Add links
-
-        while (itrSource.hasNext() && itrText.hasNext()) {
-
-            String source = (String)itrSource.next();
-
-            String text = (String)itrText.next();
-
-
-
-            Element link = mTree.createElement("A");
-
-            link.setAttribute("href", source);
-
-            link.setAttribute("style", "color: blue");
-
-            link.appendChild(mTree.createTextNode(text));
-
-            tablecell.appendChild(link);
-
-            tablecell.appendChild(mTree.createElement("BR"));
-
-        }//while
-
-
-
-        //Purge the enqueued Links
-
-        mLinksSource = new LinkedList();
-
-        mLinksText = new LinkedList();
-
-    }//addEnqueuedLinks
-
-
-
-    /** Returns the Document object
-     *
-     * @return the Document object of the DOM tree representing
-     *
-     * the HTML file
-     *
-     */
-
-    public Document getTree() {
-
-        return mTree;
-
-    }
-
-
-
-    /**
-     *
-     * Prints only the text without any of the tags of the DOM tree
-     *
-     * @param iOutputStream the output stream
-     *
-     */
-
-    public void textPrint(OutputStream iOutputStream) {
-
-        PrintWriter output = new PrintWriter(iOutputStream);
-
-        textPrint(mTree, output);
-
-        output.close();
-
-    }//textPrint
-
-
-
-    /**
-     *
-     * Prints only the text without any of the tags of the DOM tree
-     *
-     * @param iDOMTree the DOM Document module to print without any tags
-     *
-     * @param iWriter the PrintWriter
-     *
-     */
-
-    private void textPrint(Node iDOMTree, PrintWriter iWriter) {
-
-        //Print child nodes first
-
-        if (iDOMTree.hasChildNodes()) {
-
-            Node next = iDOMTree.getFirstChild();
-
-
-
-            while (next != null) {
-
-                Node current = next;
-
-                next = current.getNextSibling();
-
-
-
-                //=====Filter out what is not really text=====//
-
-                String name = current.getNodeName();
-
-                boolean valid = true;
-
-
-
-                //Styles should not be treated as text
-
-                if (name.equalsIgnoreCase("STYLE")) valid = false;
-
-                //Scripts should not be treated as text either
-
-                else if (name.equalsIgnoreCase("SCRIPT")) valid = false;
-
-
-
-                //============================================//
-
-
-
-                //Perform recursive function
-
-                if (valid) textPrint(current, iWriter);
-
-            }//while
-
-        }//if
-
-
-
-        //Check to see if the node is a Text node or an element node
-
-        int type = iDOMTree.getNodeType();
-
-
-
-        //Element node
-
-        if (type == Node.ELEMENT_NODE) {
-
-            //if the node is <BR>, then print a line break
-
-            if (iDOMTree.getNodeName().equalsIgnoreCase("BR")) {
-
-                flush(iWriter);
-
-
-
-            }
-
-        }//else if
-
-
-
-        //Text node
-
-        else if (type == Node.TEXT_NODE) {
-
-            //Print the text nodes to the output stream.
-
-            if (!(iDOMTree.getNodeValue().trim().equals(""))) {
-
-                textPrintBuffer += iDOMTree.getNodeValue();
-
-
-
-            }
-
-        }//if
-
-    }//textPrint
-
-
-
-    /**
-     *
-     * Flushs the buffered line and prints it out depending on
-     *
-     * the number of consecutive blank lines. This method also keeps track of
-     *
-     * the number of consecutive blank lines.
-     *
-     * @param iWriter the PrintWriter to flush the buffer to
-     *
-     */
-
-    private void flush(PrintWriter iWriter) {
-
-        boolean blank = false;
-
-
-
-        //Check to see if the buffered line is blank
-
-        if (textPrintBuffer.trim().length() == 0) blank = true;
-
-
-
-        //Make sure there are not too many consecutive blank lines if necessary
-
-        if (limitLinebreaks) {
-
-            if (blank && numberBlankLines < maxLinebreaks) {
-
-                iWriter.println(textPrintBuffer);
-
-                numberBlankLines++;
-
-            }//if
-
-            else if (!blank)
-
-                iWriter.println(textPrintBuffer);
-
-        }//if
-
-        else
-
-            iWriter.println(textPrintBuffer);
-
-
-
-        //Reset the numberBlankLines if the line is not blank
-
-        if (!blank) numberBlankLines = 0;
-
-        textPrintBuffer = "";
-
-    }//flush
-
-
-
-    /**
-     *
-     * Pretty prints the HTML to an OutputStream
-     *
-     * @param iNode the Document to start printing from
-     *
-     * @param iOut the output stream to print to.
-     *
-     */
-
-    public void prettyPrint(Document iNode, OutputStream iOut) {
-
-        //Create formating that will indent and print with the proper
-
-        //method specified by the Document object.
-
-        OutputFormat format = null;
-
-	//according to the java documentation, all compliant JVM's should support the
-	//ISO-8859-1 encoding.
-	format = new OutputFormat(iNode, "ISO-8859-1", true);
-
-        //Get the printer
-        HTMLSerializer printer = new HTMLSerializer(iOut, format);
-
-        try {
-
-            printer.serialize(iNode);
-
-	} catch (UnsupportedEncodingException uue) {
-
-	    System.out.println("Error: your system does not support the ISO-8859-1 encoding.");
-	    uue.printStackTrace();
-
-	} catch (Exception e) {
-
-            e.printStackTrace();
-
-        }//catch
-
-
-
-    }//prettyPrint
-
-
-
-    public static void main(String[] args) {
-
-        if (args.length < 2) {
-
-            System.out.println("Usage: java ContentExtractor [input file] [output file] {settings file}");
-
-            return;
-
-        }
-
-
-
-        FileInputStream streamIn;
-
-        try {
-
-            streamIn = new FileInputStream(args[0]);
-
-        }
-
-        catch (FileNotFoundException e) {
-
-            System.out.println("Input File Not Found");
-
-            return;
-
-        }
-
-        catch (SecurityException e) {
-
-            System.out.println("Read access denied to Input File");
-
-            return;
-
-        }
-
-
-
-
-
-        ContentExtractor ce;
-
-        if (args.length == 2) ce = new ContentExtractor(streamIn);
-
-        else ce = new ContentExtractor(args[2], streamIn);
-
-        ce.extractContent();
-
-
-
-        try {
-
-            File output = new File(args[1]);
-
-            output.createNewFile();
-
-            ce.processNoOverwrite(new File(args[0]),output);
-
-        }
-
-        catch ( IOException e ) {
-
-            System.out.println("IO Exception");
-
-            e.printStackTrace();
-
-            return;
-
-        }
-
-    }
-
-
-
-    /**
-     *
-     * This method returns a JPanel that edits the settings for the filter
-     *
-     * @return a JPanel to edit the settings from.
-     *
-     */
-
-    public ProxyFilterSettings getSettingsGUI() {
-
-        return mSettingsGUI;
-
-    }
-
-
-
-    /**
-     *
-     * Returns what the content type of the file is.
-     *
-     * @return the content type
-     *
-     */
-
-    public String getContentType() {
-
-        if (onlyText) return CONTENT_TEXT;
-
-        else return CONTENT_HTML;
-
-    }//getContentType
-
-
-
-    /**
-     *
-     * This method processes a File and returns the processed file for the
-     *
-     * proxy to use.
-     *
-     * @return the processed file
-     *
-     */
-
-    public File process(File in) throws IOException {
-
-        FileInputStream streamIn = new FileInputStream(in);
-
-        mIn = streamIn;
-
-        extractContent();
-
-        streamIn.close();
-
-
-
-        if (!onlyText) {
-
-            prettyPrint(mTree, new FileOutputStream(in));
-
-        }
-
-        else {
-
-            textPrint(new FileOutputStream(in));
-
-        }
-
-
-
-        return in;
-
-    }
-
-
-
-    /**
-     *
-     * This method processes a File and returns a new file for the
-     *
-     * proxy to use. Note: the file is not overwritten
-     *
-     * @param in the file to process
-     *
-     * @param out the output file
-     *
-     */
-
-    public File processNoOverwrite(File in, File out) throws IOException {
-
-        FileInputStream streamIn = new FileInputStream(in);
-
-        mIn = streamIn;
-
-        extractContent();
-
-        streamIn.close();
-
-
-
-        if (!onlyText) {
-
-            prettyPrint(mTree, new FileOutputStream(out));
-
-        }
-
-        else {
-
-            textPrint(new FileOutputStream(out));
-
-        }
-
-
-
-        return out;
-
-    }
-
-}//ContentExtractor

+	//Instance variables
+	private String mSettingsFile; //the settings file path
+	private Properties mSettings; //the settings properties file
+	private InputStream mIn; //the inputstream to filter
+	private Document mTree; //the DOM tree for HTML
+	private Hashtable mAdServers; //hashtable of ad servers
+	private LinkedList mLinksSource;
+	//hashtable of all the removed link sources
+	private LinkedList mLinksText; //hashtable of all the removed link texts
+	private LinkedList mLinksSourceAll; //hashtable of ALL link sources
+	private LinkedList mLinksTextAll; //hashtable of ALL link texts
+	private LinkedList mImagesSource; //hashtable of ALL image sources
+	private ProxyFilterSettings mSettingsGUI; //the settings JPanel
+	private boolean mCheckChildren;
+	//boolean to see if children nodes should be checked
+	private String textPrintBuffer; //the line to print when text printing
+	private int numberBlankLines; //the number of consecutive blank lines
+	private int lengthForTableRemover;
+	//the cumulative length of text in a table
+	private Node mBodyNode; //the BODY tag node for the link enqueuer
+
+	//Settings variables
+	public static final String IGNORE_TEXT_LINKS = "Ignore Text Links";
+	private final String IGNORE_TEXT_LINKS_DEF = "true";
+	private boolean ignoreTextLinks;
+
+	public static final String IGNORE_IMAGES = "Ignore Images";
+	private final String IGNORE_IMAGES_DEF = "true";
+	private boolean ignoreImages;
+
+	public static final String IGNORE_SCRIPTS = "Ignore Scripts";
+	private final String IGNORE_SCRIPTS_DEF = "true";
+	private boolean ignoreScripts;
+
+	public static final String IGNORE_STYLES = "Ignore Styles";
+	private final String IGNORE_STYLES_DEF = "false";
+	private boolean ignoreStyles;
+
+	public static final String IGNORE_FORMS = "Ignore Forms";
+	private final String IGNORE_FORMS_DEF = "true";
+	private boolean ignoreForms;
+
+	public static final String IGNORE_META = "Ignore Meta Tags";
+	private final String IGNORE_META_DEF = "true";
+	private boolean ignoreMeta;
+
+	//private final String MINIMUM_TEXT_LENGTH = "Minimum Text Length";
+	//private final String MINIMUM_TEXT_LENGTH_DEF = "0";
+	//private int minTextLength;
+
+	//================================================================
+	//All the settings for link lists - or link cells
+	public static final String IGNORE_LINK_CELLS = "Ignore Link Lists";
+	private final String IGNORE_LINK_CELLS_DEF = "true";
+	private boolean ignoreLinkCells;
+
+	//LC stands for Link Cells
+	public static final String LC_IGNORE_IMAGE_LINKS =
+		"Ignore Image Links in Link Lists";
+	private final String LC_IGNORE_IMAGE_LINKS_DEF = "true";
+	private boolean ignoreLCImageLinks;
+
+	public static final String LC_IGNORE_TEXT_LINKS =
+		"Ignore Text Links in Link Lists";
+	private final String LC_IGNORE_TEXT_LINKS_DEF = "true";
+	private boolean ignoreLCTextLinks;
+
+	public static final String LC_ONLY_LINKS_AND_TEXT =
+		"Ignore Only Links and Text in Link Lists";
+	private final String LC_ONLY_LINKS_AND_TEXT_DEF = "true";
+	private boolean ignoreLCOnlyLinksAndText;
+
+	//End of settings for link lists - or link cells
+	//=================================================================
+
+	public static final String IGNORE_IMAGE_LINKS = "Ignore Image Links";
+	private final String IGNORE_IMAGE_LINKS_DEF = "true";
+	private boolean ignoreImageLinks;
+
+	public static final String IGNORE_INPUT_TAGS = "Ignore <INPUT> Tags";
+	private final String IGNORE_INPUT_TAGS_DEF = "true";
+	private boolean ignoreInputTags;
+
+	public static final String IGNORE_BUTTON_TAGS = "Ignore <BUTTON> Tags";
+	private final String IGNORE_BUTTON_TAGS_DEF = "true";
+	private boolean ignoreButtonTags;
+
+	public static final String IGNORE_SELECT_TAGS = "Ignore <SELECT> Tags";
+	private final String IGNORE_SELECT_TAGS_DEF = "true";
+	private boolean ignoreSelectTags;
+
+	public static final String IGNORE_NOSCRIPT_TAGS = "Ignore <NOSCRIPT> Tags";
+	private final String IGNORE_NOSCRIPT_TAGS_DEF = "true";
+	private boolean ignoreNoscriptTags;
+
+	public static final String IGNORE_CELL_WIDTH = "Ignore Table Cell Widths";
+	private final String IGNORE_CELL_WIDTH_DEF = "false";
+	private boolean ignoreCellWidth;
+
+	public static final String IGNORE_ADS = "Ignore All Advertisements";
+	private final String IGNORE_ADS_DEF = "true";
+	private boolean ignoreAds;
+
+	public static final String ONLY_TEXT = "Print Only Text";
+	private final String ONLY_TEXT_DEF = "false";
+	private boolean onlyText;
+
+	public static final String IGNORE_DIV_STYLES =
+		"Ignore Style Attribute in <DIV> Tags";
+	private final String IGNORE_DIV_STYLES_DEF = "false";
+	private boolean ignoreDivStyles;
+
+	public static final String IGNORE_IFRAME_TAGS = "Ignore <IFRAME> Tags";
+	private final String IGNORE_IFRAME_TAGS_DEF = "false";
+	private boolean ignoreIFrameTags;
+
+	public static final String DISPLAY_IMAGE_ALTS = "Display Image ALTs";
+	private final String DISPLAY_IMAGE_ALTS_DEF = "false";
+	private boolean displayImageAlts;
+
+	public static final String DISPLAY_IMAGE_LINK_ALTS =
+		"Display Image Link ALTs";
+	private final String DISPLAY_IMAGE_LINK_ALTS_DEF = "false";
+	private boolean displayImageLinkAlts;
+
+	////////////////////////// Empty Table Settings
+	// ////////////////////////////
+	public static final String REMOVE_EMPTY_TABLES = "Remove Empty Tables";
+	private final String REMOVE_EMPTY_TABLES_DEF = "true";
+	private boolean removeEmptyTables;
+
+	public static final String SUBSTANCE_IMAGE = "<IMG> tags are substance";
+	private final String SUBSTANCE_IMAGE_DEF = "true";
+	private boolean substanceImage;
+
+	public static final String SUBSTANCE_LINKS = "<A> tags are substance";
+	private final String SUBSTANCE_LINKS_DEF = "true";
+	private boolean substanceLinks;
+
+	public static final String SUBSTANCE_IFRAME = "<IFRAME> tags are substance";
+	private final String SUBSTANCE_IFRAME_DEF = "true";
+	private boolean substanceIFrame;
+
+	public static final String SUBSTANCE_INPUT = "<INPUT> tags are substance";
+	private final String SUBSTANCE_INPUT_DEF = "true";
+	private boolean substanceInput;
+
+	public static final String SUBSTANCE_BUTTON = "<BUTTON> tags are substance";
+	private final String SUBSTANCE_BUTTON_DEF = "true";
+	private boolean substanceButton;
+
+	public static final String SUBSTANCE_TEXTAREA =
+		"<TEXTAREA> tags are substance";
+	private final String SUBSTANCE_TEXTAREA_DEF = "true";
+	private boolean substanceTextarea;
+
+	public static final String SUBSTANCE_SELECT = "<SELECT> tags are substance";
+	private final String SUBSTANCE_SELECT_DEF = "true";
+	private boolean substanceSelect;
+
+	public static final String SUBSTANCE_FORM = "<FORM> tags are substance";
+	private final String SUBSTANCE_FORM_DEF = "false";
+	private boolean substanceForm;
+
+	public static final String SUBSTANCE_MIN_TEXT_LENGTH =
+		"Minimum text length as substance";
+	private final String SUBSTANCE_MIN_TEXT_LENGTH_DEF = "1";
+	private int substanceMinTextLength;
+
+	////////////////////////////////////////////////////////////////////////////
+
+	public static final String LIMIT_LINEBREAKS = "Limit Number of Line Breaks";
+	private final String LIMIT_LINEBREAKS_DEF = "true";
+	private boolean limitLinebreaks;
+
+	public static final String MAX_LINEBREAKS = "Maximum Number of Line Breaks";
+	private final String MAX_LINEBREAKS_DEF = "2";
+	private int maxLinebreaks;
+
+	public static final String ADD_LINKS_TO_BOTTOM =
+		"Add removed links to bottom of the page";
+	private final String ADD_LINKS_TO_BOTTOM_DEF = "false";
+	private boolean addLinksToBottom;
+
+	public static final String IGNORE_EMBED_TAGS = "Ignore <EMBED> tags";
+	private final String IGNORE_EMBED_TAGS_DEF = "false";
+	private boolean ignoreEmbedTags;
+
+	/*
+	 * Link/Text ratio is determined by the amount of text words to single
+	 * links A word is considered 4 letters long.
+	 */
+	public final static String LINK_TEXT_REMOVAL_RATIO =
+		"Link/Text Removal Ratio";
+	private final String LINK_TEXT_REMOVAL_RATIO_DEF = ".25";
+	private double linkTextRatio;
+
+	//Normal final variables not associated with settings
+	public static final int ALL = 0;
+	public static final int TEXT = 1;
+	public static final int IMAGE = 2;
+	public static final int LETTERS_PER_WORD = 5;
+	public static final String SETTINGS_FILE_DEF = "settings.txt";
+	public static final String AD_FILE = "serverlist.txt";
+	public static final String CONTENT_TEXT = "text/plain";
+	public static final String CONTENT_HTML = "text/html";
+
+	/**
+	 * Creates a new instance without any input stream and the default settings
+	 * file.
+	 */
+	public ContentExtractor() {
+		this(ContentExtractor.SETTINGS_FILE_DEF, null);
+	}
+
+	/**
+	 * Creates a new instance without any input stream and with a settings file
+	 *
+	 * @param iSettings
+	 *            the settings file path
+	 */
+	public ContentExtractor(String iSettings) {
+		this(iSettings, null);
+	}
+
+	/**
+	 * Creates a new instance of ContentExtractor with the default settings
+	 * file
+	 *
+	 * @param iIn
+	 *            the input stream of the HTML file
+	 */
+	public ContentExtractor(InputStream iIn) {
+		this(ContentExtractor.SETTINGS_FILE_DEF, iIn);
+	}
+
+	/**
+	 * Creates a new instance of ContentExtractor
+	 *
+	 * @param iSettings
+	 *            the name of the settings file
+	 * @param iIn
+	 *            the input stream of the HTML file
+	 */
+	public ContentExtractor(String iSettings, InputStream iIn) {
+		mSettingsFile = iSettings;
+		mSettings = new Properties();
+		mIn = iIn;
+
+		//Load settings and ad server lists
+		loadSettingsProperties();
+		loadAdsServerList();
+		loadSettings();
+
+		mSettingsGUI = new SettingsEditor(this);
+		textPrintBuffer = "";
+		numberBlankLines = 0;
+		mLinksSource = new LinkedList();
+		mLinksText = new LinkedList();
+		mLinksSourceAll = new LinkedList();
+		mLinksTextAll = new LinkedList();
+		mImagesSource = new LinkedList();
+	}
+
+	/**
+	 * Loads the settings into the property file
+	 */
+	private void loadSettingsProperties() {
+		try {
+			mSettings.load(new FileInputStream(mSettingsFile));
+		} catch (FileNotFoundException e) {
+			//Don't load the settings if the file doesn't exist
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	/**
+	 * Loads the ad file into a hashtable
+	 */
+	public void loadAdsServerList() {
+		mAdServers = new Hashtable();
+
+		try {
+			FileReader fr = new FileReader(new File(AD_FILE));
+			BufferedReader in = new BufferedReader(fr);
+			String line = in.readLine();
+
+			while (line != null) {
+				mAdServers.put(line, line);
+				line = in.readLine();
+			} //while
+		} catch (FileNotFoundException e) {
+			//if the ad file is not there, don't do anything, just print
+			//that the file isn't there
+			System.out.println("WARNING: Server list for ad remover not found");
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	} //loadAdsServerList
+
+	/**
+	 * Returns a setting based on the final variables
+	 *
+	 * @param iSetting
+	 *            the name of the setting based on the final variables
+	 * @return the setting as a string. For boolean values, "true" and "false"
+	 *         will be returned. Null will be returned if the setting doesn't
+	 *         exist
+	 */
+	public String getSetting(String iSetting) {
+		if (iSetting.equalsIgnoreCase(IGNORE_ADS))
+			return Boolean.toString(ignoreAds);
+		else if (iSetting.equalsIgnoreCase(IGNORE_BUTTON_TAGS))
+			return Boolean.toString(ignoreButtonTags);
+		else if (iSetting.equalsIgnoreCase(IGNORE_CELL_WIDTH))
+			return Boolean.toString(ignoreCellWidth);
+		else if (iSetting.equalsIgnoreCase(IGNORE_DIV_STYLES))
+			return Boolean.toString(ignoreDivStyles);
+		else if (iSetting.equalsIgnoreCase(IGNORE_FORMS))
+			return Boolean.toString(ignoreForms);
+		else if (iSetting.equalsIgnoreCase(IGNORE_IFRAME_TAGS))
+			return Boolean.toString(ignoreIFrameTags);
+		else if (iSetting.equalsIgnoreCase(IGNORE_IMAGE_LINKS))
+			return Boolean.toString(ignoreImageLinks);
+		else if (iSetting.equalsIgnoreCase(IGNORE_IMAGES))
+			return Boolean.toString(ignoreImages);
+		else if (iSetting.equalsIgnoreCase(IGNORE_INPUT_TAGS))
+			return Boolean.toString(ignoreInputTags);
+		else if (iSetting.equalsIgnoreCase(IGNORE_LINK_CELLS))
+			return Boolean.toString(ignoreLinkCells);
+		else if (iSetting.equalsIgnoreCase(IGNORE_META))
+			return Boolean.toString(ignoreMeta);
+		else if (iSetting.equalsIgnoreCase(IGNORE_NOSCRIPT_TAGS))
+			return Boolean.toString(ignoreNoscriptTags);
+		else if (iSetting.equalsIgnoreCase(IGNORE_SCRIPTS))
+			return Boolean.toString(ignoreScripts);
+		else if (iSetting.equalsIgnoreCase(IGNORE_SELECT_TAGS))
+			return Boolean.toString(ignoreSelectTags);
+		else if (iSetting.equalsIgnoreCase(IGNORE_STYLES))
+			return Boolean.toString(ignoreStyles);
+		else if (iSetting.equalsIgnoreCase(IGNORE_TEXT_LINKS))
+			return Boolean.toString(ignoreTextLinks);
+		else if (iSetting.equalsIgnoreCase(LC_IGNORE_IMAGE_LINKS))
+			return Boolean.toString(ignoreLCImageLinks);
+		else if (iSetting.equalsIgnoreCase(LC_IGNORE_TEXT_LINKS))
+			return Boolean.toString(ignoreLCTextLinks);
+		else if (iSetting.equalsIgnoreCase(LINK_TEXT_REMOVAL_RATIO))
+			return Double.toString(linkTextRatio);
+		else if (iSetting.equalsIgnoreCase(ONLY_TEXT))
+			return Boolean.toString(onlyText);
+		else if (iSetting.equalsIgnoreCase(LC_ONLY_LINKS_AND_TEXT))
+			return Boolean.toString(ignoreLCOnlyLinksAndText);
+		else if (iSetting.equalsIgnoreCase(DISPLAY_IMAGE_ALTS))
+			return Boolean.toString(displayImageAlts);
+		else if (iSetting.equalsIgnoreCase(DISPLAY_IMAGE_LINK_ALTS))
+			return Boolean.toString(displayImageLinkAlts);
+		else if (iSetting.equalsIgnoreCase(REMOVE_EMPTY_TABLES))
+			return Boolean.toString(removeEmptyTables);
+		else if (iSetting.equalsIgnoreCase(LIMIT_LINEBREAKS))
+			return Boolean.toString(limitLinebreaks);
+		else if (iSetting.equalsIgnoreCase(MAX_LINEBREAKS))
+			return Integer.toString(maxLinebreaks);
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_BUTTON))
+			return Boolean.toString(substanceButton);
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_FORM))
+			return Boolean.toString(substanceForm);
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_IFRAME))
+			return Boolean.toString(substanceIFrame);
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_IMAGE))
+			return Boolean.toString(substanceImage);
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_INPUT))
+			return Boolean.toString(substanceInput);
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_LINKS))
+			return Boolean.toString(substanceLinks);
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_MIN_TEXT_LENGTH))
+			return Integer.toString(substanceMinTextLength);
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_SELECT))
+			return Boolean.toString(substanceSelect);
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_TEXTAREA))
+			return Boolean.toString(substanceTextarea);
+
+		else if (iSetting.equalsIgnoreCase(ADD_LINKS_TO_BOTTOM))
+			return Boolean.toString(addLinksToBottom);
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_EMBED_TAGS))
+			return Boolean.toString(ignoreEmbedTags);
+
+		return null;
+
+	} //getSettings
+
+	/**
+	 *
+	 * Sets a setting based on the final variables
+	 *
+	 * @param iSetting
+	 *            the name of the setting based on the final variables
+	 *
+	 * @param iValue
+	 *            the desired value of the setting. For boolean values,
+	 * "true" and "false" should be used
+	 *
+	 */
+
+	public void changeSetting(String iSetting, String iValue) {
+
+		if (iSetting.equalsIgnoreCase(IGNORE_ADS))
+			ignoreAds = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_BUTTON_TAGS))
+			ignoreButtonTags = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_CELL_WIDTH))
+			ignoreCellWidth = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_DIV_STYLES))
+			ignoreDivStyles = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_FORMS))
+			ignoreForms = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_IFRAME_TAGS))
+			ignoreIFrameTags = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_IMAGE_LINKS))
+			ignoreImageLinks = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_IMAGES))
+			ignoreImages = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_INPUT_TAGS))
+			ignoreInputTags = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_LINK_CELLS))
+			ignoreLinkCells = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_META))
+			ignoreMeta = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_NOSCRIPT_TAGS))
+			ignoreNoscriptTags = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_SCRIPTS))
+			ignoreScripts = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_SELECT_TAGS))
+			ignoreSelectTags = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_STYLES))
+			ignoreStyles = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_TEXT_LINKS))
+			ignoreTextLinks = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(LC_IGNORE_IMAGE_LINKS))
+			ignoreLCImageLinks = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(LC_IGNORE_TEXT_LINKS))
+			ignoreLCTextLinks = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(LINK_TEXT_REMOVAL_RATIO))
+			linkTextRatio = Double.parseDouble(iValue);
+
+		else if (iSetting.equalsIgnoreCase(ONLY_TEXT))
+			onlyText = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(LC_ONLY_LINKS_AND_TEXT))
+			ignoreLCOnlyLinksAndText = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(DISPLAY_IMAGE_ALTS))
+			displayImageAlts = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(DISPLAY_IMAGE_LINK_ALTS))
+			displayImageLinkAlts = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(REMOVE_EMPTY_TABLES))
+			removeEmptyTables = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(LIMIT_LINEBREAKS))
+			limitLinebreaks = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(MAX_LINEBREAKS))
+			maxLinebreaks = Integer.parseInt(iValue);
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_BUTTON))
+			substanceButton = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_FORM))
+			substanceForm = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_IFRAME))
+			substanceIFrame = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_IMAGE))
+			substanceImage = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_INPUT))
+			substanceInput = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_LINKS))
+			substanceLinks = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_MIN_TEXT_LENGTH))
+			substanceMinTextLength = Integer.parseInt(iValue);
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_SELECT))
+			substanceSelect = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(SUBSTANCE_TEXTAREA))
+			substanceTextarea = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(ADD_LINKS_TO_BOTTOM))
+			addLinksToBottom = iValue.equals("true");
+
+		else if (iSetting.equalsIgnoreCase(IGNORE_EMBED_TAGS))
+			ignoreEmbedTags = iValue.equals("true");
+
+	} //changeSetting
+
+	/**
+	 *
+	 * Loads the settings file into the boolean values
+	 *
+	 */
+
+	public void loadSettings() {
+
+		ignoreTextLinks =
+			(
+				mSettings.getProperty(
+					IGNORE_TEXT_LINKS,
+					IGNORE_TEXT_LINKS_DEF)).equals(
+				"true");
+
+		ignoreImageLinks =
+			(
+				mSettings.getProperty(
+					IGNORE_IMAGE_LINKS,
+					IGNORE_IMAGE_LINKS_DEF)).equals(
+				"true");
+
+		ignoreImages =
+			(mSettings.getProperty(IGNORE_IMAGES, IGNORE_IMAGES_DEF)).equals(
+				"true");
+
+		ignoreScripts =
+			(mSettings.getProperty(IGNORE_SCRIPTS, IGNORE_SCRIPTS_DEF)).equals(
+				"true");
+
+		ignoreStyles =
+			(mSettings.getProperty(IGNORE_STYLES, IGNORE_SCRIPTS_DEF)).equals(
+				"true");
+
+		ignoreForms =
+			(mSettings.getProperty(IGNORE_FORMS, IGNORE_FORMS_DEF)).equals(
+				"true");
+
+		ignoreMeta =
+			(mSettings.getProperty(IGNORE_META, IGNORE_META_DEF)).equals(
+				"true");
+
+		ignoreLinkCells =
+			(
+				mSettings.getProperty(
+					IGNORE_LINK_CELLS,
+					IGNORE_LINK_CELLS_DEF)).equals(
+				"true");
+
+		ignoreLCImageLinks =
+			(
+				mSettings.getProperty(
+					LC_IGNORE_IMAGE_LINKS,
+					LC_IGNORE_IMAGE_LINKS_DEF)).equals(
+				"true");
+
+		ignoreLCTextLinks =
+			(
+				mSettings.getProperty(
+					LC_IGNORE_TEXT_LINKS,
+					LC_IGNORE_TEXT_LINKS_DEF)).equals(
+				"true");
+
+		linkTextRatio =
+			Double.parseDouble(
+				mSettings.getProperty(
+					LINK_TEXT_REMOVAL_RATIO,
+					LINK_TEXT_REMOVAL_RATIO_DEF));
+
+		ignoreButtonTags =
+			(
+				mSettings.getProperty(
+					IGNORE_BUTTON_TAGS,
+					IGNORE_BUTTON_TAGS_DEF)).equals(
+				"true");
+
+		ignoreInputTags =
+			(
+				mSettings.getProperty(
+					IGNORE_INPUT_TAGS,
+					IGNORE_INPUT_TAGS_DEF)).equals(
+				"true");
+
+		ignoreSelectTags =
+			(
+				mSettings.getProperty(
+					IGNORE_SELECT_TAGS,
+					IGNORE_SELECT_TAGS_DEF)).equals(
+				"true");
+
+		ignoreNoscriptTags =
+			(
+				mSettings.getProperty(
+					IGNORE_NOSCRIPT_TAGS,
+					IGNORE_NOSCRIPT_TAGS_DEF)).equals(
+				"true");
+
+		ignoreCellWidth =
+			(
+				mSettings.getProperty(
+					IGNORE_CELL_WIDTH,
+					IGNORE_CELL_WIDTH_DEF)).equals(
+				"true");
+
+		ignoreAds =
+			(mSettings.getProperty(IGNORE_ADS, IGNORE_ADS_DEF)).equals("true");
+
+		onlyText =
+			(mSettings.getProperty(ONLY_TEXT, ONLY_TEXT_DEF)).equals("true");
+
+		ignoreIFrameTags =
+			(
+				mSettings.getProperty(
+					IGNORE_IFRAME_TAGS,
+					IGNORE_IFRAME_TAGS_DEF)).equals(
+				"true");
+
+		ignoreDivStyles =
+			(
+				mSettings.getProperty(
+					IGNORE_DIV_STYLES,
+					IGNORE_DIV_STYLES_DEF)).equals(
+				"true");
+
+		ignoreLCOnlyLinksAndText =
+			(
+				mSettings.getProperty(
+					LC_ONLY_LINKS_AND_TEXT,
+					LC_ONLY_LINKS_AND_TEXT_DEF)).equals(
+				"true");
+
+		displayImageAlts =
+			(
+				mSettings.getProperty(
+					DISPLAY_IMAGE_ALTS,
+					DISPLAY_IMAGE_ALTS_DEF)).equals(
+				"true");
+
+		displayImageLinkAlts =
+			(
+				mSettings.getProperty(
+					DISPLAY_IMAGE_LINK_ALTS,
+					DISPLAY_IMAGE_LINK_ALTS_DEF)).equals(
+				"true");
+
+		removeEmptyTables =
+			(
+				mSettings.getProperty(
+					REMOVE_EMPTY_TABLES,
+					REMOVE_EMPTY_TABLES_DEF)).equals(
+				"true");
+
+		limitLinebreaks =
+			(
+				mSettings.getProperty(
+					LIMIT_LINEBREAKS,
+					LIMIT_LINEBREAKS_DEF)).equals(
+				"true");
+
+		maxLinebreaks =
+			Integer.parseInt(
+				mSettings.getProperty(MAX_LINEBREAKS, MAX_LINEBREAKS_DEF));
+
+		substanceButton =
+			(
+				mSettings.getProperty(
+					SUBSTANCE_BUTTON,
+					SUBSTANCE_BUTTON_DEF)).equals(
+				"true");
+
+		substanceForm =
+			(mSettings.getProperty(SUBSTANCE_FORM, SUBSTANCE_FORM_DEF)).equals(
+				"true");
+
+		substanceIFrame =
+			(
+				mSettings.getProperty(
+					SUBSTANCE_IFRAME,
+					SUBSTANCE_IFRAME_DEF)).equals(
+				"true");
+
+		substanceImage =
+			(
+				mSettings.getProperty(
+					SUBSTANCE_IMAGE,
+					SUBSTANCE_IMAGE_DEF)).equals(
+				"true");
+
+		substanceInput =
+			(
+				mSettings.getProperty(
+					SUBSTANCE_INPUT,
+					SUBSTANCE_INPUT_DEF)).equals(
+				"true");
+
+		substanceLinks =
+			(
+				mSettings.getProperty(
+					SUBSTANCE_LINKS,
+					SUBSTANCE_LINKS_DEF)).equals(
+				"true");
+
+		substanceMinTextLength =
+			Integer.parseInt(
+				mSettings.getProperty(
+					SUBSTANCE_MIN_TEXT_LENGTH,
+					SUBSTANCE_MIN_TEXT_LENGTH_DEF));
+
+		substanceSelect =
+			(
+				mSettings.getProperty(
+					SUBSTANCE_SELECT,
+					SUBSTANCE_SELECT_DEF)).equals(
+				"true");
+
+		substanceTextarea =
+			(
+				mSettings.getProperty(
+					SUBSTANCE_TEXTAREA,
+					SUBSTANCE_TEXTAREA_DEF)).equals(
+				"true");
+
+		addLinksToBottom =
+			(
+				mSettings.getProperty(
+					ADD_LINKS_TO_BOTTOM,
+					ADD_LINKS_TO_BOTTOM_DEF)).equals(
+				"true");
+
+		ignoreEmbedTags =
+			(
+				mSettings.getProperty(
+					IGNORE_EMBED_TAGS,
+					IGNORE_EMBED_TAGS_DEF)).equals(
+				"true");
+
+	} //loadSettings
+
+	private void saveProperties() {
+
+		mSettings.setProperty(
+			IGNORE_TEXT_LINKS,
+			Boolean.toString(ignoreTextLinks));
+
+		mSettings.setProperty(
+			IGNORE_IMAGE_LINKS,
+			Boolean.toString(ignoreImageLinks));
+
+		mSettings.setProperty(IGNORE_IMAGES, Boolean.toString(ignoreImages));
+
+		mSettings.setProperty(IGNORE_SCRIPTS, Boolean.toString(ignoreScripts));
+
+		mSettings.setProperty(IGNORE_STYLES, Boolean.toString(ignoreStyles));
+
+		mSettings.setProperty(IGNORE_FORMS, Boolean.toString(ignoreForms));
+
+		mSettings.setProperty(IGNORE_META, Boolean.toString(ignoreMeta));
+
+		mSettings.setProperty(
+			IGNORE_LINK_CELLS,
+			Boolean.toString(ignoreLinkCells));
+
+		mSettings.setProperty(
+			LC_IGNORE_IMAGE_LINKS,
+			Boolean.toString(ignoreLCImageLinks));
+
+		mSettings.setProperty(
+			LC_IGNORE_TEXT_LINKS,
+			Boolean.toString(ignoreLCTextLinks));
+
+		mSettings.setProperty(
+			LINK_TEXT_REMOVAL_RATIO,
+			Double.toString(linkTextRatio));
+
+		mSettings.setProperty(
+			IGNORE_BUTTON_TAGS,
+			Boolean.toString(ignoreButtonTags));
+
+		mSettings.setProperty(
+			IGNORE_INPUT_TAGS,
+			Boolean.toString(ignoreInputTags));
+
+		mSettings.setProperty(
+			IGNORE_SELECT_TAGS,
+			Boolean.toString(ignoreSelectTags));
+
+		mSettings.setProperty(
+			IGNORE_NOSCRIPT_TAGS,
+			Boolean.toString(ignoreNoscriptTags));
+
+		mSettings.setProperty(
+			IGNORE_CELL_WIDTH,
+			Boolean.toString(ignoreCellWidth));
+
+		mSettings.setProperty(IGNORE_ADS, Boolean.toString(ignoreAds));
+
+		mSettings.setProperty(ONLY_TEXT, Boolean.toString(onlyText));
+
+		mSettings.setProperty(
+			IGNORE_IFRAME_TAGS,
+			Boolean.toString(ignoreIFrameTags));
+
+		mSettings.setProperty(
+			IGNORE_DIV_STYLES,
+			Boolean.toString(ignoreDivStyles));
+
+		mSettings.setProperty(
+			LC_ONLY_LINKS_AND_TEXT,
+			Boolean.toString(ignoreLCOnlyLinksAndText));
+
+		mSettings.setProperty(
+			DISPLAY_IMAGE_ALTS,
+			Boolean.toString(displayImageAlts));
+
+		mSettings.setProperty(
+			DISPLAY_IMAGE_LINK_ALTS,
+			Boolean.toString(displayImageLinkAlts));
+
+		mSettings.setProperty(
+			REMOVE_EMPTY_TABLES,
+			Boolean.toString(removeEmptyTables));
+
+		mSettings.setProperty(
+			LIMIT_LINEBREAKS,
+			Boolean.toString(limitLinebreaks));
+
+		mSettings.setProperty(MAX_LINEBREAKS, Integer.toString(maxLinebreaks));
+
+		mSettings.setProperty(
+			SUBSTANCE_BUTTON,
+			Boolean.toString(substanceButton));
+
+		mSettings.setProperty(SUBSTANCE_FORM, Boolean.toString(substanceForm));
+
+		mSettings.setProperty(
+			SUBSTANCE_IFRAME,
+			Boolean.toString(substanceIFrame));
+
+		mSettings.setProperty(
+			SUBSTANCE_IMAGE,
+			Boolean.toString(substanceImage));
+
+		mSettings.setProperty(
+			SUBSTANCE_INPUT,
+			Boolean.toString(substanceInput));
+
+		mSettings.setProperty(
+			SUBSTANCE_LINKS,
+			Boolean.toString(substanceLinks));
+
+		mSettings.setProperty(
+			SUBSTANCE_MIN_TEXT_LENGTH,
+			Integer.toString(substanceMinTextLength));
+
+		mSettings.setProperty(
+			SUBSTANCE_SELECT,
+			Boolean.toString(substanceSelect));
+
+		mSettings.setProperty(
+			SUBSTANCE_TEXTAREA,
+			Boolean.toString(substanceTextarea));
+
+		mSettings.setProperty(
+			ADD_LINKS_TO_BOTTOM,
+			Boolean.toString(addLinksToBottom));
+
+		mSettings.setProperty(
+			IGNORE_EMBED_TAGS,
+			Boolean.toString(ignoreEmbedTags));
+
+	}
+
+	/**
+	 *
+	 * Save the settings file
+	 *
+	 */
+
+	public void saveSettings() {
+
+		saveProperties();
+
+		try {
+
+			mSettings.store(
+				new FileOutputStream(new File(mSettingsFile)),
+				"Content Extractor Settings File");
+
+		} catch (Exception e) {
+
+			e.printStackTrace();
+
+		}
+
+	}
+
+	/**
+	 *
+	 * Extracts the content of the html page based on the settings
+	 *
+	 */
+
+	public void extractContent() {
+
+		HTMLParser parser = new HTMLParser();
+
+		try {
+
+			//Create the input source using the ISO-8859-1 character set
+
+			InputStreamReader reader = new InputStreamReader(mIn, "ISO-8859-1");
+
+			parser.parse(new InputSource(reader));
+
+			mTree = parser.getDocument();
+
+			extract(mTree);
+
+			//Appends the links to the bottom of the page
+
+			if (addLinksToBottom)
+				addEnqueuedLinks();
+
+		} catch (Exception e) {
+
+			e.printStackTrace();
+
+		}
+
+	}
+
+	/**
+	 * Extracts content and returns text only without changing settings
+	 */
+	public void extractContentAsText() {
+		String lastSetting = getSetting(ContentExtractor.ONLY_TEXT);
+		extractContent();
+		changeSetting(ONLY_TEXT, lastSetting);
+	}
+
+	/**
+	 *
+	 * A recursive algorithm that checks through a node's children and
+	 *
+	 * filters out what it wants
+	 *
+	 * @param iNode
+	 *            the node to start checking
+	 *
+	 */
+
+	private void extract(Node iNode) {
+
+		NodeList children = iNode.getChildNodes();
+
+		if (children != null) {
+
+			int len = children.getLength();
+
+			for (int i = 0; i < len; i++) {
+
+				filterNode(children.item(i));
+
+			}
+
+		}
+
+	}
+
+	/**
+	 *
+	 * Examines a node and determines if it should be included in the
+	 *
+	 * extracted DOM tree
+	 *
+	 * @param iNode
+	 *            the node to filter
+	 *
+	 */
+
+	private void filterNode(Node iNode) {
+
+		//Boolean that determines if the the children of the node should be
+		// filtered
+
+		mCheckChildren = true;
+
+		//Put the node through the sequence of filters
+
+		passThroughFilters(iNode);
+
+		if (mCheckChildren)
+			filterChildren(iNode);
+
+	} //filterNode
+
+	/**
+	 *
+	 * Passes a node through a set of filters
+	 *
+	 * @param iNode
+	 *            the node to filter
+	 *
+	 */
+
+	private void passThroughFilters(Node iNode) {
+
+		//Check to see if the node is a Text node or an element node and
+
+		//act accordingly
+
+		int type = iNode.getNodeType();
+
+		Node parent = iNode.getParentNode();
+
+		//Get the attributes of the node
+
+		NamedNodeMap attr = iNode.getAttributes();
+
+		//Element node
+
+		if (type == Node.ELEMENT_NODE) {
+
+			String name = iNode.getNodeName();
+
+			//================================================================
+			// Set of conditions that just check the nodes without editing or
+			// deleting them
+			//================================================================
+
+			//Any type of link is encountered
+			if (isLink(iNode))
+				recordLink(iNode);
+			if (isImage(iNode));
+			recordImage(iNode);
+
+			//================================================================
+
+			// Set of conditions that edit the nodes but don't delete them
+
+			//================================================================
+
+			//<TD|TABLE width=*> removes widths
+
+			if ((name.equalsIgnoreCase("TD") || name.equalsIgnoreCase("TABLE"))
+				&& ignoreCellWidth) {
+
+				if (hasAttribute(iNode, "width"))
+					removeAttribute(iNode, "width");
+
+			} //if
+
+			//<DIV style=*> removes style
+
+			if (name.equalsIgnoreCase("DIV") && ignoreDivStyles) {
+
+				if (hasAttribute(iNode, "style"))
+					removeAttribute(iNode, "style");
+
+			} //if
+
+			//================================================================
+
+			//Set of conditionals determining what to ignore and not to ignore
+
+			// (Conditions that DELETE nodes from the DOM tree)
+
+			//================================================================
+
+			if (isAdLink(iNode) && ignoreAds) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<TD> with Link/Text Ratio higher than threshold
+
+			else if (name.equalsIgnoreCase("TD") && ignoreLinkCells) {
+
+				testRemoveCell(iNode);
+
+			}
+
+			//<A HREF> with no Images
+
+			else if (isTextLink(iNode) && ignoreTextLinks) {
+
+				parent.removeChild(iNode);
+
+				if (addLinksToBottom)
+					enqueueLink(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<A HREF> with Images
+
+			else if (isImageLink(iNode) && ignoreImageLinks) {
+
+				if (displayImageLinkAlts) {
+
+					Node alt = null;
+
+					boolean image = isImage(iNode);
+
+					//Make sure the image link is the image
+
+					if (image)
+						alt = createImageLinkAltNode(iNode);
+
+					if (alt != null) {
+
+						Node replaced =
+							parent.getParentNode().insertBefore(
+								alt,
+								iNode.getParentNode());
+
+					} //if
+
+					//Remove the image and the link
+
+					if (image) {
+
+						parent.removeChild(iNode);
+
+						//Only remove the link if there are no more children
+
+						//to prevent NullPointerExceptions
+
+						if (!parent.hasChildNodes())
+							parent.getParentNode().removeChild(parent);
+
+					}
+
+				} //if
+
+				else
+					parent.removeChild(iNode);
+
+			}
+
+			//<IMG*>
+
+			else if (
+				name.equalsIgnoreCase("IMG")
+					&& ignoreImages
+					&& !isImageLink(iNode)) {
+
+				if (displayImageAlts) {
+
+					Node alt = createAltNode(iNode);
+
+					if (alt != null) {
+
+						Node replaced = parent.insertBefore(alt, iNode);
+
+					} //if
+
+				} //if
+
+				parent.removeChild(iNode);
+
+			}
+
+			//<SCRIPT>
+
+			else if (name.equalsIgnoreCase("SCRIPT") && ignoreScripts) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<NOSCRIPT>
+
+			else if (name.equalsIgnoreCase("NOSCRIPT") && ignoreNoscriptTags) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<NOSCRIPT> removal and save children
+
+			else if (name.equalsIgnoreCase("NOSCRIPT") && ignoreScripts) {
+
+				if (iNode.hasChildNodes()) {
+
+					Node current = iNode.getFirstChild();
+
+					while (current != null) {
+
+						Node next = current.getNextSibling();
+
+						//reinsert child before NOSCRIPT node
+
+						parent.insertBefore(current, iNode);
+
+						current = next;
+
+					} //while
+
+				} //if
+
+				parent.removeChild(iNode);
+
+			} //else if
+
+			//<STYLE>
+
+			else if (name.equalsIgnoreCase("STYLE") && ignoreStyles) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<META>
+
+			else if (name.equalsIgnoreCase("META") && ignoreMeta) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<FORM>
+
+			else if (name.equalsIgnoreCase("FORM") && ignoreForms) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<INPUT>
+
+			else if (name.equalsIgnoreCase("INPUT") && ignoreInputTags) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<BUTTON>
+
+			else if (name.equalsIgnoreCase("BUTTON") && ignoreButtonTags) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<SELECT>
+
+			else if (name.equalsIgnoreCase("SELECT") && ignoreSelectTags) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<IFRAME>
+
+			else if (name.equalsIgnoreCase("IFRAME") && ignoreIFrameTags) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			}
+
+			//<TABLE>
+
+			else if (name.equalsIgnoreCase("TABLE") && removeEmptyTables) {
+
+				//Call method that removes empty tables
+
+				removeEmptyTables(iNode);
+
+				mCheckChildren = false;
+
+			} //else if
+
+			//<EMBED>
+
+			else if (name.equalsIgnoreCase("EMBED") && ignoreEmbedTags) {
+
+				parent.removeChild(iNode);
+
+				mCheckChildren = false;
+
+			} //else if
+
+			//<BODY>
+
+			else if (name.equalsIgnoreCase("BODY"))
+				mBodyNode = iNode;
+
+		} //if
+
+		//Text node
+
+		else if (type == Node.TEXT_NODE) {
+
+			String value = iNode.getNodeValue();
+
+			//================================================================
+
+			//Set of conditions determining what text to ignore
+
+			//================================================================
+
+			//none so far
+
+		} //else if
+
+	}
+
+	/**
+	 *
+	 * Filter child nodes
+	 *
+	 * @param iNode
+	 *            the node to filter the children
+	 *
+	 */
+
+	private void filterChildren(Node iNode) {
+
+		if (iNode.hasChildNodes()) {
+
+			Node next = iNode.getFirstChild();
+
+			while (next != null) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				filterNode(current);
+
+			}
+
+		}
+
+	} //filterChildren
+
+	/**
+	 *
+	 * Removes empty tables
+	 *
+	 * @param iNode
+	 *            the table node to examine
+	 *
+	 */
+
+	private void removeEmptyTables(Node iNode) {
+
+		//First filter the children but check for
+
+		//undeleted nodes
+
+		if (iNode.hasChildNodes()) {
+
+			Node next = iNode.getFirstChild();
+
+			while (next != null) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				filterNode(current);
+
+			} //while
+
+		} //if
+
+		//Check to see if the table is actually empty
+
+		//but reset length recognizer
+
+		lengthForTableRemover = 0;
+
+		boolean empty = processEmptyTable(iNode);
+
+		if (empty)
+			iNode.getParentNode().removeChild(iNode);
+
+	} //removeEmptyTables
+
+	/**
+	 *
+	 * Recursively check children nodes to see if the table is empty
+	 *
+	 * @param iNode
+	 *            the node to recursively check.
+	 *
+	 * @return true if the nodes are empty, false if they are not
+	 *
+	 */
+
+	private boolean processEmptyTable(Node iNode) {
+
+		//The variable that determines if the table is empty
+
+		boolean empty = true;
+
+		//Determine the type of the node
+
+		int type = iNode.getNodeType();
+
+		String name = iNode.getNodeName();
+
+		//If it is an element
+
+		if (type == Node.ELEMENT_NODE) {
+
+			//Check to make sure if there are any elements that have
+
+			//substance according to what settings are set
+
+			if (name.equalsIgnoreCase("IMG") && substanceImage)
+				empty = false;
+
+			if (name.equalsIgnoreCase("A") && substanceLinks)
+				empty = false;
+
+			if (name.equalsIgnoreCase("BUTTON") && substanceButton)
+				empty = false;
+
+			if (name.equalsIgnoreCase("FORM") && substanceForm)
+				empty = false;
+
+			if (name.equalsIgnoreCase("IFRAME") && substanceIFrame)
+				empty = false;
+
+			if (name.equalsIgnoreCase("INPUT") && substanceInput)
+				empty = false;
+
+			if (name.equalsIgnoreCase("SELECT") && substanceSelect)
+				empty = false;
+
+			if (name.equalsIgnoreCase("TEXTAREA") && substanceTextarea)
+				empty = false;
+
+		} //if
+
+		else if (type == Node.TEXT_NODE) {
+
+			//Trim the text and make sure there is no more substance
+
+			lengthForTableRemover += iNode.getNodeValue().trim().length();
+
+			if (lengthForTableRemover >= substanceMinTextLength)
+				empty = false;
+
+		} //else if
+
+		//Process the children
+
+		if (iNode.hasChildNodes()) {
+
+			Node next = iNode.getFirstChild();
+
+			while (next != null && empty) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				empty = processEmptyTable(current);
+
+			} //while
+
+		} //if
+
+		return empty;
+
+	} //processEmptyTable
+
+	/**
+	 *
+	 * Creates a new node from an image link node that creates a link to
+	 *
+	 * the image and the target of the image link.
+	 *
+	 * @param iNode
+	 *            the <IMG>node that is within the <A>tag
+	 *
+	 * @return the new node or null if something went wrong
+	 *
+	 */
+
+	private Node createImageLinkAltNode(Node iNode) {
+
+		//Make sure it is an image link and an image
+
+		if (!isImage(iNode))
+			return null;
+
+		if (!isImageLink(iNode))
+			return null;
+
+		//Determine if there is an ALT tag
+
+		String altTag = "";
+
+		Node attr = iNode.getAttributes().getNamedItem("alt");
+
+		if (attr == null)
+			altTag = "-Link-";
+
+		else if (attr.getNodeValue().trim() == "")
+			altTag = "-Link-";
+
+		else
+			altTag = attr.getNodeValue();
+
+		//Determine the source of the image
+
+		String imageSource = "";
+
+		Node attrSource = iNode.getAttributes().getNamedItem("src");
+
+		if (attrSource == null)
+			return null;
+
+		else if (attrSource.getNodeValue().trim() == "")
+			return null;
+
+		else
+			imageSource = attrSource.getNodeValue();
+
+		//Determine the href of the link
+
+		String linkHref = "";
+
+		Node link = iNode.getParentNode();
+
+		linkHref = link.getAttributes().getNamedItem("href").getNodeValue();
+
+		if (linkHref == null)
+			return null;
+
+		else if (linkHref.trim() == "")
+			return null;
+
+		//CONSTRUCT REPLACEMENT NODE
+
+		Element parent = mTree.createElement("B");
+
+		Element italic = mTree.createElement("I");
+
+		Element imageLink = mTree.createElement("A");
+
+		imageLink.setAttribute("href", imageSource);
+
+		Element altLink = mTree.createElement("A");
+
+		altLink.setAttribute("href", linkHref);
+
+		Node openBracket = mTree.createTextNode("[");
+
+		Node closeBracket = mTree.createTextNode("]");
+
+		Node seperator = mTree.createTextNode(" | ");
+
+		Node imageLinkText = mTree.createTextNode("Image");
+
+		Node altLinkText = mTree.createTextNode(altTag);
+
+		//Link together nodes
+
+		parent.appendChild(openBracket);
+
+		parent.appendChild(imageLink);
+
+		imageLink.appendChild(imageLinkText);
+
+		parent.appendChild(seperator);
+
+		parent.appendChild(italic);
+
+		italic.appendChild(altLink);
+
+		altLink.appendChild(altLinkText);
+
+		parent.appendChild(closeBracket);
+
+		//Return node
+
+		return parent;
+
+	} //createImageLinkAltNode
+
+	/**
+	 *
+	 * Creates a new node that creates a link to an image node using ALT text
+	 *
+	 * @param iNode
+	 *            the image node
+	 *
+	 * @return the node to add to the DOM tree or null if the node isn't an
+	 *         image
+	 *
+	 * or doesn't have an ALT attribute.
+	 *
+	 */
+
+	private Node createAltNode(Node iNode) {
+
+		if (!isImage(iNode))
+			return null;
+
+		//Determine if there is an ALT tag
+
+		Node attr = iNode.getAttributes().getNamedItem("alt");
+
+		if (attr == null)
+			return null;
+
+		if (attr.getNodeValue().trim() == "")
+			return null;
+
+		//Determine if there is a src
+
+		Node attrLink = iNode.getAttributes().getNamedItem("src");
+
+		if (attrLink == null)
+			return null;
+
+		//Create new link node
+
+		Element altNode = mTree.createElement("A");
+
+		//Add text
+
+		altNode.setAttribute("href", attrLink.getNodeValue());
+
+		//Bold Element
+
+		Node bold = mTree.createElement("B");
+
+		Node textNode = mTree.createTextNode("[" + attr.getNodeValue() + "]");
+
+		bold.appendChild(textNode);
+
+		altNode.appendChild(bold);
+
+		altNode.setNodeValue("");
+
+		return altNode;
+
+	} //getAltNode
+
+	/**
+	 *
+	 * Determines if a node has a link to an ad
+	 *
+	 * @param iNode
+	 *            the node to check for ads
+	 *
+	 * @return true if the node is a link to an ad, or false if it isn't
+	 *
+	 */
+
+	private boolean isAdLink(Node iNode) {
+
+		String attr = "";
+
+		if (hasAttribute(iNode, "href"))
+			attr = "href";
+
+		else if (hasAttribute(iNode, "src"))
+			attr = "src";
+
+		//Doesn't had the required attributes
+
+		if (attr.equals(""))
+			return false;
+
+		//Get the address of the potential ad
+
+		Node attrNode = iNode.getAttributes().getNamedItem(attr);
+
+		String address = attrNode.getNodeValue();
+
+		try {
+
+			URL addressURL = new URL(address);
+
+			String host = addressURL.getHost();
+
+			if (mAdServers.get(host) != null)
+				return true;
+
+		} catch (Exception e) {
+
+			//Don't do anything because if the URL is malformed, it
+
+			//probably doesn't point towards an advertisement domain
+
+		} //catch
+
+		return false;
+
+	} //isAdLink
+
+	/**
+	 *
+	 * Removes an attribute if the attrbiute exists from an Element node
+	 *
+	 * @param iNode
+	 *            the node
+	 *
+	 * @param iAttr
+	 *            the name of the attribute
+	 *
+	 */
+
+	private void removeAttribute(Node iNode, String iAttr) {
+
+		iNode.getAttributes().removeNamedItem(iAttr);
+
+	} //removeAttribute
+
+	/**
+	 *
+	 * Adds an attribute to an Element node
+	 *
+	 * @param iNode
+	 *            the node
+	 *
+	 * @param iName
+	 *            the name of the attribute
+	 *
+	 * @param iValue
+	 *            the value of the attribute
+	 *
+	 */
+
+	private void addAttribute(Node iNode, String iName, String iValue) {
+
+		Attr attr = mTree.createAttribute(iName);
+
+		attr.setValue(iValue);
+
+		iNode.getAttributes().setNamedItem(attr);
+
+	} //addAttribute
+
+	/**
+	 *
+	 * Checks to see if an attribute exists in an Element node
+	 *
+	 * @param iNode
+	 *            the node
+	 *
+	 * @param iAttr
+	 *            the name of the attribute to check for
+	 *
+	 * @return true if the attribute exists, false if it doesn't
+	 *
+	 */
+
+	private boolean hasAttribute(Node iNode, String iAttr) {
+
+		Node attr = iNode.getAttributes().getNamedItem(iAttr);
+
+		if (attr == null)
+			return false;
+
+		else
+			return true;
+
+	} //hasAttribute
+
+	/**
+	 *
+	 * Removes a table cell if the link ratio is appropriate
+	 *
+	 * @param iNode
+	 *            the table cell node
+	 *
+	 */
+
+	public void testRemoveCell(Node iNode) {
+
+		//Ignore if the cell has no children
+
+		if (!iNode.hasChildNodes())
+			return;
+
+		double links;
+
+		double words;
+
+		int type = ALL;
+
+		if (ignoreLCImageLinks && ignoreLCTextLinks)
+			type = ALL;
+
+		else if (ignoreLCImageLinks)
+			type = IMAGE;
+
+		else if (ignoreLCTextLinks)
+			type = TEXT;
+
+		//Count up links and words
+
+		links = getNumLinks(iNode, type);
+
+		words = getNumWords(iNode);
+
+		//Compute the ratio and check for divide by 0
+
+		double ratio = 0;
+
+		if (words == 0)
+			ratio = linkTextRatio + 1;
+
+		else
+			ratio = links / words;
+
+		if (ratio > linkTextRatio) {
+
+			Node next = iNode.getFirstChild();
+
+			while (next != null) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				//Check to see if only text and link nodes should be removed
+
+				if (ignoreLCOnlyLinksAndText) {
+
+					removeLinksAndText(current, type);
+
+				} else {
+
+					Node next2 = iNode.getFirstChild();
+
+					while (next2 != null) {
+
+						Node current2 = next;
+
+						next2 = current2.getNextSibling();
+
+						removeAll(current2);
+
+					} //while
+
+					//Don't check the children because they are all removed
+
+					mCheckChildren = false;
+
+				} //else
+
+			}
+
+		}
+
+	} //testRemoveCell
+
+	/**
+	 *
+	 * Recursive function that removes everything
+	 *
+	 * @param iNode
+	 *            the node to start removing children from
+	 *
+	 */
+
+	private void removeAll(Node iNode) {
+
+		if (isTextLink(iNode) && addLinksToBottom) {
+
+			enqueueLink(iNode);
+
+		} else {
+
+			Node next = iNode.getFirstChild();
+
+			while (next != null) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				removeAll(current);
+
+			} //while
+
+		} //while
+
+		iNode.getParentNode().removeChild(iNode);
+
+	} //removeChild
+
+	/**
+	 *
+	 * Recursive function that removes links and text nodes
+	 *
+	 * @param iNode
+	 *            the node to edit
+	 *
+	 * @param iType
+	 *            the type of links to remove
+	 *
+	 */
+
+	private void removeLinksAndText(Node iNode, int iType) {
+
+		if (isLink(iNode) || iNode.getNodeType() == Node.TEXT_NODE) {
+
+			if (iType == ALL)
+				iNode.getParentNode().removeChild(iNode);
+
+			if (iType == IMAGE && isImageLink(iNode))
+				iNode.getParentNode().removeChild(iNode);
+
+			if (iType == TEXT && !isImageLink(iNode))
+				iNode.getParentNode().removeChild(iNode);
+
+			if (isTextLink(iNode) && addLinksToBottom)
+				enqueueLink(iNode);
+
+		} else {
+
+			Node next = iNode.getFirstChild();
+
+			while (next != null) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				removeLinksAndText(current, iType);
+
+			} //while
+
+		} //else
+
+	} //isDeeperLink
+
+	/**
+	 *
+	 * Determines if a domain is an ad domain
+	 *
+	 * @param iDomain
+	 *            the the domain to check
+	 *
+	 * @return true if the domain is an ad domain, false if it is not.
+	 *
+	 */
+
+	private boolean isAdDomain(String iDomain) {
+
+		if (mAdServers.get(iDomain) == null)
+			return false;
+
+		return true;
+
+	} //isAdDomain
+
+	/**
+	 *
+	 * Counts the number of links from one node downward
+	 *
+	 * @param iNode
+	 *            the node to start counting from
+	 *
+	 * @param iType
+	 *            the type of links to count.
+	 *
+	 * @return the number of links
+	 *
+	 */
+
+	private double getNumLinks(Node iNode, int iType) {
+
+		double links = 0;
+
+		if (iNode.hasChildNodes()) {
+
+			Node next = iNode.getFirstChild();
+
+			while (next != null) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				links += getNumLinks(current, iType);
+
+			}
+
+		}
+
+		switch (iType) {
+
+			case ALL :
+
+				if (isLink(iNode))
+					links++;
+
+				break;
+
+			case TEXT :
+
+				if (isTextLink(iNode))
+					links++;
+
+				break;
+
+			case IMAGE :
+
+				if (isImageLink(iNode))
+					links++;
+
+				break;
+
+		} //switch
+
+		return links;
+
+	} //getNumLinks
+
+	/**
+	 *
+	 * Checks to see if a node is a link
+	 *
+	 * @param iNode
+	 *            the node to check
+	 *
+	 * @return true if the node is a link, false if it is not
+	 *
+	 */
+
+	private boolean isLink(Node iNode) {
+
+		//Check to see if the node is a Text node or an element node
+
+		int type = iNode.getNodeType();
+
+		NamedNodeMap attr = iNode.getAttributes();
+
+		//Element node
+
+		if (type == Node.ELEMENT_NODE) {
+
+			String name = iNode.getNodeName();
+
+			//Check to see if it is a link
+
+			if (name.equalsIgnoreCase("A")) {
+
+				for (int i = 0; i < attr.getLength(); i++) {
+
+					if (attr.item(i).getNodeName().equalsIgnoreCase("HREF")) {
+
+						return true;
+
+					} //if
+
+				} //fot
+
+			} //else if
+
+		} //if
+
+		return false;
+
+	}
+
+	/**
+	 *
+	 * Checks to see if a node is a link with an image as the link or if the
+	 *
+	 * node is an image, it checks if it is a link
+	 *
+	 * @param iNode
+	 *            the node to check
+	 *
+	 * @return true if the node is a link with an image, false if it is not
+	 *
+	 */
+
+	private boolean isImageLink(Node iNode) {
+
+		boolean imageLink = false;
+
+		//Check to see if the node is a link
+
+		if (isLink(iNode)) {
+
+			//Check to see if the children have an image in it
+
+			if (iNode.hasChildNodes()) {
+
+				Node next = iNode.getFirstChild();
+
+				while (next != null && !imageLink) {
+
+					Node current = next;
+
+					next = current.getNextSibling();
+
+					if (isImage(current))
+						imageLink = true;
+
+				} //while
+
+			} //if
+
+		} //if
+
+		//If the node is an image, check if its parent is a link
+
+		else if (isImage(iNode)) {
+
+			if (isLink(iNode.getParentNode()))
+				imageLink = true;
+
+		} //else if
+
+		return imageLink;
+
+	} //isImageLink
+
+	/**
+	 *
+	 * Checks to see if a node is an image
+	 *
+	 * @param iNode
+	 *            the node to check
+	 *
+	 * @return true if the node is an image, false if it is not
+	 *
+	 */
+
+	private boolean isImage(Node iNode) {
+
+		boolean image = false;
+
+		//Check to see if the node is an image
+
+		int type = iNode.getNodeType();
+
+		if (type == Node.ELEMENT_NODE) {
+
+			if (iNode.getNodeName().equalsIgnoreCase("IMG"))
+				image = true;
+
+		} //if
+
+		return image;
+
+	}
+
+	/**
+	 *
+	 * Determines if a link is a text link
+	 *
+	 * @param iNode
+	 *            the node to analyze
+	 *
+	 * @return true if the node is a text link and false if it is not.
+	 *
+	 */
+
+	private boolean isTextLink(Node iNode) {
+
+		return !isImageLink(iNode) && isLink(iNode);
+
+	} //isTextLink
+
+	/**
+	 *
+	 * Counts the number of links from one node downward
+	 *
+	 * @param iNode
+	 *            the node to start counting from
+	 *
+	 * @return the number of links
+	 *
+	 */
+
+	private double getNumWords(Node iNode) {
+
+		double words = 0;
+
+		if (iNode.hasChildNodes()) {
+
+			Node next = iNode.getFirstChild();
+
+			while (next != null) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				//If it is a link, don't go any deeper into it
+
+				if (!isLink(current))
+					words += getNumWords(current);
+
+			}
+
+		}
+
+		//Check to see if the node is a Text node or an element node
+
+		int type = iNode.getNodeType();
+
+		//Text node
+
+		if (type == Node.TEXT_NODE) {
+
+			String content = iNode.getNodeValue();
+
+			words += ((double) content.length()) / LETTERS_PER_WORD;
+
+		} //if
+
+		return words;
+
+	} //getNumLinks
+
+	/**
+	 * Prepares a link node to be added to the bottom of the page by adding it
+	 * to the Hashtable
+	 *
+	 * @param iLinkNode
+	 *            the link node to add o the bottom of the page
+	 */
+	private void enqueueLink(Node iLinkNode) {
+		//Make sure the node is a link
+		if (!isTextLink(iLinkNode))
+			return;
+
+		//Get the source of the text link
+		String source =
+			iLinkNode.getAttributes().getNamedItem("href").getNodeValue();
+		String text = iLinkNode.getFirstChild().getNodeValue();
+
+		if (source != null && text != null) {
+			mLinksSource.add(source);
+			mLinksText.add(text);
+		}
+	} //enqueueLink
+
+	/**
+	 * Records the presence of a link
+	 *
+	 * @param iLinkNode
+	 *            the link node
+	 */
+	private void recordLink(Node iLinkNode) {
+		//Make sure the node is a link
+		if (!isLink(iLinkNode))
+			return;
+
+		//Get the source of the text link
+		String source =
+			iLinkNode.getAttributes().getNamedItem("href").getNodeValue();
+		String text = iLinkNode.getFirstChild().getNodeValue();
+
+		if (source != null) {
+			mLinksSource.add(source);
+			mLinksText.add(text);
+		}
+	} //recordLink
+
+	/**
+	 * Records the presence of an image
+	 *
+	 * @param iImageNode
+	 *            the image node
+	 */
+	private void recordImage(Node iImageNode) {
+		//Make sure the node is an image
+		if (!isImage(iImageNode))
+			return;
+
+		//get source of the image
+		String source =
+			iImageNode.getAttributes().getNamedItem("src").getNodeValue();
+
+		if (source != null) {
+			mImagesSource.add(source);
+		}
+	}
+
+	/**
+	 * Returns a linked list containing all the image sources
+	 *
+	 * @return a linked list containing all the image sources
+	 */
+	public LinkedList getImageSources() {
+		return mImagesSource;
+	}
+
+	/**
+	 * Returns a linked list containing all the link sources
+	 *
+	 * @return a linked list containing all the link sources
+	 */
+	public LinkedList getLinkSources() {
+		return mLinksSourceAll;
+	}
+
+	/**
+	 * Returns a linked list containing all the link text
+	 *
+	 * @return a linked list containing all the link text
+	 */
+	public LinkedList getLinkText() {
+		return mLinksTextAll;
+	}
+
+	/**
+	 *
+	 * Add enqueued links to bottom of page
+	 *
+	 */
+
+	private void addEnqueuedLinks() {
+
+		//Make sure the body node isn't null
+
+		if (mBodyNode == null)
+			return;
+
+		//Make sure there are links enqueued
+
+		if (mLinksSource.size() == 0)
+			return;
+
+		if (mLinksText.size() == 0)
+			return;
+
+		//Start adding formating
+
+		Element center = mTree.createElement("CENTER");
+
+		Element table = mTree.createElement("TABLE");
+
+		table.setAttribute("cellpadding", "5");
+
+		table.setAttribute("width", "100%");
+
+		Element tablerow = mTree.createElement("TR");
+
+		Element tablecell = mTree.createElement("TD");
+
+		tablecell.setAttribute("bgcolor", "white");
+
+		Element headerTag = mTree.createElement("H3");
+
+		Node header = mTree.createTextNode("Removed Links:");
+
+		//Append them
+
+		mBodyNode.appendChild(table);
+
+		table.appendChild(tablerow);
+
+		tablerow.appendChild(tablecell);
+
+		tablecell.appendChild(center);
+
+		center.appendChild(headerTag);
+
+		headerTag.appendChild(header);
+
+		Iterator itrSource = mLinksSource.listIterator(0);
+
+		Iterator itrText = mLinksText.listIterator(0);
+
+		//Add links
+
+		while (itrSource.hasNext() && itrText.hasNext()) {
+
+			String source = (String) itrSource.next();
+
+			String text = (String) itrText.next();
+
+			Element link = mTree.createElement("A");
+
+			link.setAttribute("href", source);
+
+			link.setAttribute("style", "color: blue");
+
+			link.appendChild(mTree.createTextNode(text));
+
+			tablecell.appendChild(link);
+
+			tablecell.appendChild(mTree.createElement("BR"));
+
+		} //while
+
+		//Purge the enqueued Links
+
+		mLinksSource = new LinkedList();
+
+		mLinksText = new LinkedList();
+
+	} //addEnqueuedLinks
+
+	/**
+	 * Returns the Document object
+	 *
+	 * @return the Document object of the DOM tree representing
+	 *
+	 * the HTML file
+	 *
+	 */
+
+	public Document getTree() {
+
+		return mTree;
+
+	}
+
+	/**
+	 *
+	 * Prints only the text without any of the tags of the DOM tree
+	 *
+	 * @param iOutputStream
+	 *            the output stream
+	 *
+	 */
+
+	public void textPrint(OutputStream iOutputStream) {
+
+		PrintWriter output = new PrintWriter(iOutputStream);
+
+		textPrint(mTree, output);
+
+		output.close();
+
+	} //textPrint
+
+	/**
+	 *
+	 * Prints only the text without any of the tags of the DOM tree
+	 *
+	 * @param iDOMTree
+	 *            the DOM Document module to print without any tags
+	 *
+	 * @param iWriter
+	 *            the PrintWriter
+	 *
+	 */
+
+	private void textPrint(Node iDOMTree, PrintWriter iWriter) {
+
+		//Print child nodes first
+
+		if (iDOMTree.hasChildNodes()) {
+
+			Node next = iDOMTree.getFirstChild();
+
+			while (next != null) {
+
+				Node current = next;
+
+				next = current.getNextSibling();
+
+				//=====Filter out what is not really text=====//
+
+				String name = current.getNodeName();
+
+				boolean valid = true;
+
+				//Styles should not be treated as text
+
+				if (name.equalsIgnoreCase("STYLE"))
+					valid = false;
+
+				//Scripts should not be treated as text either
+
+				else if (name.equalsIgnoreCase("SCRIPT"))
+					valid = false;
+
+				//============================================//
+
+				//Perform recursive function
+
+				if (valid)
+					textPrint(current, iWriter);
+
+			} //while
+
+		} //if
+
+		//Check to see if the node is a Text node or an element node
+
+		int type = iDOMTree.getNodeType();
+
+		//Element node
+
+		if (type == Node.ELEMENT_NODE) {
+
+			//if the node is <BR>, then print a line break
+
+			if (iDOMTree.getNodeName().equalsIgnoreCase("BR")) {
+
+				flush(iWriter);
+
+			}
+
+		} //else if
+
+		//Text node
+
+		else if (type == Node.TEXT_NODE) {
+
+			//Print the text nodes to the output stream.
+
+			if (!(iDOMTree.getNodeValue().trim().equals(""))) {
+
+				textPrintBuffer += iDOMTree.getNodeValue();
+
+			}
+
+		} //if
+
+	} //textPrint
+
+	/**
+	 *
+	 * Flushs the buffered line and prints it out depending on
+	 *
+	 * the number of consecutive blank lines. This method also keeps track of
+	 *
+	 * the number of consecutive blank lines.
+	 *
+	 * @param iWriter
+	 *            the PrintWriter to flush the buffer to
+	 *
+	 */
+
+	private void flush(PrintWriter iWriter) {
+
+		boolean blank = false;
+
+		//Check to see if the buffered line is blank
+
+		if (textPrintBuffer.trim().length() == 0)
+			blank = true;
+
+		//Make sure there are not too many consecutive blank lines if
+		// necessary
+
+		if (limitLinebreaks) {
+
+			if (blank && numberBlankLines < maxLinebreaks) {
+
+				iWriter.println(textPrintBuffer);
+
+				numberBlankLines++;
+
+			} //if
+
+			else if (!blank)
+				iWriter.println(textPrintBuffer);
+
+		} //if
+
+		else
+			iWriter.println(textPrintBuffer);
+
+		//Reset the numberBlankLines if the line is not blank
+
+		if (!blank)
+			numberBlankLines = 0;
+
+		textPrintBuffer = "";
+
+	} //flush
+
+	/**
+	 *
+	 * Pretty prints the HTML to an OutputStream
+	 *
+	 * @param iNode
+	 *            the Document to start printing from
+	 *
+	 * @param iOut
+	 *            the output stream to print to.
+	 *
+	 */
+
+	public void prettyPrint(Document iNode, OutputStream iOut) {
+
+		//Create formating that will indent and print with the proper
+
+		//method specified by the Document object.
+
+		OutputFormat format = null;
+
+		//according to the java documentation, all compliant JVM's should
+		// support the
+		//ISO-8859-1 encoding.
+		format = new OutputFormat(iNode, "ISO-8859-1", true);
+
+		//Get the printer
+		HTMLSerializer printer = new HTMLSerializer(iOut, format);
+
+		try {
+
+			printer.serialize(iNode);
+
+		} catch (UnsupportedEncodingException uue) {
+
+			System.out.println(
+				"Error: your system does not support the ISO-8859-1 encoding.");
+			uue.printStackTrace();
+
+		} catch (Exception e) {
+
+			e.printStackTrace();
+
+		} //catch
+
+	} //prettyPrint
+
+	public static void main(String[] args) {
+
+		if (args.length < 2) {
+
+			System.out.println(
+				"Usage: java ContentExtractor [input file] [output file] {settings file}");
+
+			return;
+
+		}
+
+		FileInputStream streamIn;
+
+		try {
+
+			streamIn = new FileInputStream(args[0]);
+
+		} catch (FileNotFoundException e) {
+
+			System.out.println("Input File Not Found");
+
+			return;
+
+		} catch (SecurityException e) {
+
+			System.out.println("Read access denied to Input File");
+
+			return;
+
+		}
+
+		ContentExtractor ce;
+
+		if (args.length == 2)
+			ce = new ContentExtractor(streamIn);
+
+		else
+			ce = new ContentExtractor(args[2], streamIn);
+
+		ce.extractContent();
+
+		try {
+
+			File output = new File(args[1]);
+
+			output.createNewFile();
+
+			ce.processNoOverwrite(new File(args[0]), output);
+
+		} catch (IOException e) {
+
+			System.out.println("IO Exception");
+
+			e.printStackTrace();
+
+			return;
+
+		}
+
+	}
+
+	/**
+	 *
+	 * This method returns a JPanel that edits the settings for the filter
+	 *
+	 * @return a JPanel to edit the settings from.
+	 *
+	 */
+
+	public ProxyFilterSettings getSettingsGUI() {
+
+		return mSettingsGUI;
+
+	}
+
+	/**
+	 *
+	 * Returns what the content type of the file is.
+	 *
+	 * @return the content type
+	 *
+	 */
+
+	public String getContentType() {
+
+		if (onlyText)
+			return CONTENT_TEXT;
+
+		else
+			return CONTENT_HTML;
+
+	} //getContentType
+
+	/**
+	 *
+	 * This method processes a File and returns the processed file for the
+	 *
+	 * proxy to use.
+	 *
+	 * @return the processed file
+	 *
+	 */
+
+	public File process(File in) throws IOException {
+
+		FileInputStream streamIn = new FileInputStream(in);
+
+		mIn = streamIn;
+
+		extractContent();
+
+		streamIn.close();
+
+		if (!onlyText) {
+
+			prettyPrint(mTree, new FileOutputStream(in));
+
+		} else {
+
+			textPrint(new FileOutputStream(in));
+
+		}
+
+		return in;
+
+	}
+
+	/**
+	 *
+	 * This method processes a File and returns a new file for the
+	 *
+	 * proxy to use. Note: the file is not overwritten
+	 *
+	 * @param in
+	 *            the file to process
+	 *
+	 * @param out
+	 *            the output file
+	 *
+	 */
+
+	public File processNoOverwrite(File in, File out) throws IOException {
+
+		FileInputStream streamIn = new FileInputStream(in);
+
+		mIn = streamIn;
+
+		extractContent();
+
+		streamIn.close();
+
+		if (!onlyText) {
+
+			prettyPrint(mTree, new FileOutputStream(out));
+
+		} else {
+
+			textPrint(new FileOutputStream(out));
+
+		}
+
+		return out;
+
+	}
+
+} //ContentExtractor
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/HttpStream.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/HttpStream.java
index 3914bbf..2b48a19 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/HttpStream.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/HttpStream.java
@@ -1,236 +1,256 @@
+package psl.memento.pervasive.crunch;
+
 import java.io.BufferedOutputStream;
 import java.io.FileOutputStream;
 import java.io.FileInputStream;
 import java.io.File;
 import java.io.OutputStream;
-import java.io.InputStream;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.net.Socket;
 import java.util.Hashtable;
 import java.util.Enumeration;

-public class HttpStream{
-    public static final String TEMP_FILE_SUFFIX = ".temp";
-    public static final String TEMP_FILE_PREFIX = "proxy.";
-    public static final String TEMP_FILE_DIR    = "."+File.separator+"cache";
-
-    public static final long   TIME_OUT    = 5000;
-    public static final int    SLEEP_TIME  = 50;
-    public static final int    BUFFER_SIZE = 4096;
-    public static final byte[] NEWLINE     = "\r\n".getBytes();
-    public static final byte[] SEPARATOR   = ": ".getBytes();
-
-    private String firstLine;
-    private Hashtable attributes;
-    private BufferedInputStream inStream;
-    private Socket socket;
-    private boolean hasContent;
-
-    public boolean hasContent(){
-	return hasContent;
-    }
-
-    public HttpStream(Socket s) throws IOException, ReadTimeoutException{
-	socket = s;
-	inStream = new BufferedInputStream(socket.getInputStream());
-	attributes = new Hashtable();
-	readHeader();
-    }
-
-    public HttpStream(String first_line, Hashtable attribs, File data){
-	firstLine = first_line;
-	attributes = attribs;
-	try{
-	    inStream = new BufferedInputStream(new FileInputStream(data));
-	}catch(Exception e){
-	    System.out.println("Error creating stream...");
-	    e.printStackTrace();
+public class HttpStream {
+	public static final String TEMP_FILE_SUFFIX = ".temp";
+	public static final String TEMP_FILE_PREFIX = "proxy.";
+	public static final String TEMP_FILE_DIR = "." + File.separator + "cache";
+
+	public static final long TIME_OUT = 5000;
+	public static final int SLEEP_TIME = 50;
+	public static final int BUFFER_SIZE = 4096;
+	public static final byte[] NEWLINE = "\r\n".getBytes();
+	public static final byte[] SEPARATOR = ": ".getBytes();
+
+	private String firstLine;
+	private Hashtable attributes;
+	private BufferedInputStream inStream;
+	private Socket socket;
+	private boolean hasContent;
+
+	public boolean hasContent() {
+		return hasContent;
 	}
-    }
-
-    private void readHeader() throws ReadTimeoutException{
-	LineInputStream lines = new LineInputStream(inStream);
-	try{
-	    hasContent=false;
-	    firstLine = lines.readLine();
-	    System.out.println("\t"+firstLine);
-
-	    String input = lines.readLine();
-	    int index = input.indexOf(":");
-
-	    while(index>-1){
-		System.out.println("\t"+input);
-		String key = input.substring(0,index).toLowerCase().trim();
-		String value = input.substring(index+1,input.length()).trim();
-		attributes.put(key, value);
-
-		input = lines.readLine();
-
-		index = input.indexOf(":");
-	    }
-	    hasContent=true;
-	}catch(ReadTimeoutException rte){
-	    throw new ReadTimeoutException("Timeout while reading http header.");
-	}catch(Exception e){
-	    e.printStackTrace();
+
+	public HttpStream(Socket s) throws IOException, ReadTimeoutException {
+		socket = s;
+		inStream = new BufferedInputStream(socket.getInputStream());
+		attributes = new Hashtable();
+		readHeader();
 	}
-	System.out.println();
-    }
-
-    public String getAttribute(String attributeKey){
-	return (String)(attributes.get(attributeKey.toLowerCase()));
-    }
-
-    public void setAttribute(String attributeKey, String value){
-	attributes.put(attributeKey, value);
-    }
-
-    public File downloadToFile() throws IOException{
-	File tempFile = null;
-	try{
-	    tempFile = File.createTempFile(TEMP_FILE_PREFIX,
-					   TEMP_FILE_SUFFIX,
-					   new File(TEMP_FILE_DIR));
-	    sendContentToStream(new BufferedOutputStream(new FileOutputStream(tempFile)));
-	}catch(ReadTimeoutException rte){
-	    System.out.println("File Download Timed Out.");
-	}catch(IOException e){
-	    try{tempFile.delete();}
-	    catch(Exception ex){}
-	    throw e;
+
+	public HttpStream(String first_line, Hashtable attribs, File data) {
+		firstLine = first_line;
+		attributes = attribs;
+		try {
+			inStream = new BufferedInputStream(new FileInputStream(data));
+		} catch (Exception e) {
+			System.out.println("Error creating stream...");
+			e.printStackTrace();
+		}
 	}
-	return tempFile;
-    }
-
-    /**
-     * Replaces the contents of an http message with the contents of
-     * a file.
-     **/
-    public void replaceContentWithFile(File f){
-	try{
-	    //change the instream to a FileInputStream
-	    inStream = new BufferedInputStream(new FileInputStream(f));
-
-	    //fix the size of the stream
-	    attributes.remove("content-length");
-	    attributes.put("content-length", String.valueOf(f.length()));
-	    hasContent=true;
-	}catch(Exception e){
-	    e.printStackTrace();
+
+	private void readHeader() throws ReadTimeoutException {
+		LineInputStream lines = new LineInputStream(inStream);
+		try {
+			hasContent = false;
+			firstLine = lines.readLine();
+			System.out.println("\t" + firstLine);
+
+			String input = lines.readLine();
+			int index = input.indexOf(":");
+
+			while (index > -1) {
+				System.out.println("\t" + input);
+				String key = input.substring(0, index).toLowerCase().trim();
+				String value =
+					input.substring(index + 1, input.length()).trim();
+				attributes.put(key, value);
+
+				input = lines.readLine();
+
+				index = input.indexOf(":");
+			}
+			hasContent = true;
+		} catch (ReadTimeoutException rte) {
+			throw new ReadTimeoutException("Timeout while reading http header.");
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		System.out.println();
 	}
-    }
-
-    /**
-     * Sends the header of an http message to a stream (no content)
-     **/
-    public void sendHeaderToStream(OutputStream outStream) throws IOException{
-	try{
-	    //write the first line
-	    outStream.write(firstLine.getBytes());
-	    outStream.write(NEWLINE);
-
-	    //write the rest of the header
-	    Enumeration keys = attributes.keys();
-	    while(keys.hasMoreElements()){
-		String currentKey = (String)(keys.nextElement());
-		String currentValue = (String)(attributes.get(currentKey));
-		outStream.write(currentKey.getBytes());
-		outStream.write(SEPARATOR);
-		outStream.write(currentValue.getBytes());
-		outStream.write(NEWLINE);
-	    }
-	    outStream.write(NEWLINE);
-	    outStream.flush();
-	}catch(Exception e){
-	    e.printStackTrace();
+
+	public String getAttribute(String attributeKey) {
+		return (String) (attributes.get(attributeKey.toLowerCase()));
 	}
-    }
-
-    /**
-     * Sends the contents (no header) of the http message to a stream
-     *
-     * @param outStream the stream to write the content portion of the
-     * http message to
-     **/
-    public void sendContentToStream(OutputStream outStream) throws IOException, ReadTimeoutException{
-	System.out.println("http::sendingContentToStream...");
-	long length = getContentLength();
-	if(length>-1){
-	    sendKnownContentToStream(outStream, length);
-	}else{
-	    sendUnknownContentToStream(outStream);
+
+	public void setAttribute(String attributeKey, String value) {
+		attributes.put(attributeKey, value);
 	}
-	System.out.println("http::done");
-    }
-
-    private void sendKnownContentToStream(OutputStream outStream, long length) throws IOException, ReadTimeoutException{
-	byte[] dataBuffer = new byte[BUFFER_SIZE];
-	long lastWriteTime = System.currentTimeMillis();
-	while(length>0){
-	    //find the available bytes
-	    int available = inStream.available();
-	    if(available>BUFFER_SIZE)available=BUFFER_SIZE;
-
-	    //transfer the bytes
-	    if(available>0){
-		available = inStream.read(dataBuffer, 0, available);
-		outStream.write(dataBuffer, 0, available);
-		length -= (long)available;
-		lastWriteTime = System.currentTimeMillis();
-	    }
-	    //wait for more bytes, and time out if necessary
-	    else{
-		if(System.currentTimeMillis()>lastWriteTime+TIME_OUT){
-		    try{outStream.flush();}
-		    catch(Exception e){}
-		    throw new ReadTimeoutException("Transfer Timed Out.");
+
+	public File downloadToFile() throws IOException {
+		File tempFile = null;
+		try {
+			tempFile =
+				File.createTempFile(
+					TEMP_FILE_PREFIX,
+					TEMP_FILE_SUFFIX,
+					new File(TEMP_FILE_DIR));
+			sendContentToStream(
+				new BufferedOutputStream(new FileOutputStream(tempFile)));
+		} catch (ReadTimeoutException rte) {
+			System.out.println("File Download Timed Out.");
+		} catch (IOException e) {
+			try {
+				tempFile.delete();
+			} catch (Exception ex) {
+			}
+			throw e;
 		}
-		try{Thread.sleep(SLEEP_TIME);}
-		catch(InterruptedException ie){}
-	    }
+		return tempFile;
 	}
-    }
-
-    /**
-     * Unknown content always comes from sockets because
-     * we know the size of file streams
-     **/
-    private void sendUnknownContentToStream(OutputStream outStream) throws IOException, ReadTimeoutException{
-	byte[] dataBuffer = new byte[BUFFER_SIZE];
-	long lastWriteTime = System.currentTimeMillis();
-	while(!socket.isInputShutdown()){
-	    //find available bytes
-	    int available = inStream.available();
-	    if(available>BUFFER_SIZE)available=BUFFER_SIZE;
-
-	    //transfer the bytes
-	    if(available>0){
-		available = inStream.read(dataBuffer, 0, available);
-		outStream.write(dataBuffer, 0, available);
-		lastWriteTime = System.currentTimeMillis();
-	    }
-	    //wait for more bytes, and time out if necessary
-	    else{
-		if(System.currentTimeMillis()>lastWriteTime+TIME_OUT){
-		    try{outStream.flush();}
-		    catch(Exception e){}
-		    throw new ReadTimeoutException("Transfer Timed Out.");
+
+	/**
+	 * Replaces the contents of an http message with the contents of a file.
+	 */
+	public void replaceContentWithFile(File f) {
+		try {
+			//change the instream to a FileInputStream
+			inStream = new BufferedInputStream(new FileInputStream(f));
+
+			//fix the size of the stream
+			attributes.remove("content-length");
+			attributes.put("content-length", String.valueOf(f.length()));
+			hasContent = true;
+		} catch (Exception e) {
+			e.printStackTrace();
 		}
-		try{Thread.sleep(SLEEP_TIME);}
-		catch(InterruptedException ie){}
-	    }
 	}
-    }
-
-    /**
-     * returns the content length or -1 if there was an error
-     **/
-    private long getContentLength(){
-	try{
-	    return Long.parseLong((String)attributes.get("content-length"));
-	}catch(Exception e){
-	    return -1;
+
+	/**
+	 * Sends the header of an http message to a stream (no content)
+	 */
+	public void sendHeaderToStream(OutputStream outStream) throws IOException {
+		try {
+			//write the first line
+			outStream.write(firstLine.getBytes());
+			outStream.write(NEWLINE);
+
+			//write the rest of the header
+			Enumeration keys = attributes.keys();
+			while (keys.hasMoreElements()) {
+				String currentKey = (String) (keys.nextElement());
+				String currentValue = (String) (attributes.get(currentKey));
+				outStream.write(currentKey.getBytes());
+				outStream.write(SEPARATOR);
+				outStream.write(currentValue.getBytes());
+				outStream.write(NEWLINE);
+			}
+			outStream.write(NEWLINE);
+			outStream.flush();
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	}
+
+	/**
+	 * Sends the contents (no header) of the http message to a stream
+	 *
+	 * @param outStream
+	 *            the stream to write the content portion of the http message
+	 *            to
+	 */
+	public void sendContentToStream(OutputStream outStream)
+		throws IOException, ReadTimeoutException {
+		System.out.println("http::sendingContentToStream...");
+		long length = getContentLength();
+		if (length > -1) {
+			sendKnownContentToStream(outStream, length);
+		} else {
+			sendUnknownContentToStream(outStream);
+		}
+		System.out.println("http::done");
+	}
+
+	private void sendKnownContentToStream(OutputStream outStream, long length)
+		throws IOException, ReadTimeoutException {
+		byte[] dataBuffer = new byte[BUFFER_SIZE];
+		long lastWriteTime = System.currentTimeMillis();
+		while (length > 0) {
+			//find the available bytes
+			int available = inStream.available();
+			if (available > BUFFER_SIZE)
+				available = BUFFER_SIZE;
+
+			//transfer the bytes
+			if (available > 0) {
+				available = inStream.read(dataBuffer, 0, available);
+				outStream.write(dataBuffer, 0, available);
+				length -= (long) available;
+				lastWriteTime = System.currentTimeMillis();
+			}
+			//wait for more bytes, and time out if necessary
+			else {
+				if (System.currentTimeMillis() > lastWriteTime + TIME_OUT) {
+					try {
+						outStream.flush();
+					} catch (Exception e) {
+					}
+					throw new ReadTimeoutException("Transfer Timed Out.");
+				}
+				try {
+					Thread.sleep(SLEEP_TIME);
+				} catch (InterruptedException ie) {
+				}
+			}
+		}
+	}
+
+	/**
+	 * Unknown content always comes from sockets because we know the size of
+	 * file streams
+	 */
+	private void sendUnknownContentToStream(OutputStream outStream)
+		throws IOException, ReadTimeoutException {
+		byte[] dataBuffer = new byte[BUFFER_SIZE];
+		long lastWriteTime = System.currentTimeMillis();
+		while (!socket.isInputShutdown()) {
+			//find available bytes
+			int available = inStream.available();
+			if (available > BUFFER_SIZE)
+				available = BUFFER_SIZE;
+
+			//transfer the bytes
+			if (available > 0) {
+				available = inStream.read(dataBuffer, 0, available);
+				outStream.write(dataBuffer, 0, available);
+				lastWriteTime = System.currentTimeMillis();
+			}
+			//wait for more bytes, and time out if necessary
+			else {
+				if (System.currentTimeMillis() > lastWriteTime + TIME_OUT) {
+					try {
+						outStream.flush();
+					} catch (Exception e) {
+					}
+					throw new ReadTimeoutException("Transfer Timed Out.");
+				}
+				try {
+					Thread.sleep(SLEEP_TIME);
+				} catch (InterruptedException ie) {
+				}
+			}
+		}
+	}
+
+	/**
+	 * returns the content length or -1 if there was an error
+	 */
+	private long getContentLength() {
+		try {
+			return Long.parseLong((String) attributes.get("content-length"));
+		} catch (Exception e) {
+			return -1;
+		}
 	}
-    }
 }
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/LineInputStream.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/LineInputStream.java
index 7519ccd..8284c99 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/LineInputStream.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/LineInputStream.java
@@ -1,99 +1,101 @@
+package psl.memento.pervasive.crunch;
+
 import java.io.InputStream;
 import java.io.IOException;
 /**
- * An extension of an inputstream to add readline functionality for
- * the purpose of reading http header lines.
+ * An extension of an inputstream to add readline functionality for the purpose
+ * of reading http header lines.
  *
  * @author Peter Grimm (pmg23@cs.columbia.edu)
- **/
-public class LineInputStream extends InputStream{
-    public static final long READ_TIMEOUT =  5000;
-    public static final int  SLEEP_TIME   =   100;
-    public static final int  BUFFER_SIZE  = 16384;
-    private byte[] lineBuffer = new byte[BUFFER_SIZE];
-    private InputStream inputStream;
-    private int lineLength;
-
-    /**
-     * Creates a new LineInputStream that gets its data from
-     * the given InputStream.
-     *
-     * @param is where to read lines from
-     **/
-    public LineInputStream(InputStream is){
-	inputStream = is;
-    }
-
-    /**
-     * Reads one line.  Expects lines from the input stream to be
-     * terminated by a CRLF.  Lines are returned without the trailing
-     * CRLF.  Will throw an IOException for lines that exceed
-     * BUFFER_SIZE, if a timeout occurs while waiting for data, or
-     * if there are problems reading the data.
-     *
-     * @return one line
-     **/
-    public String readLine() throws IOException, ReadTimeoutException{
-	try{
-	    waitForAvailable();
-	    lineBuffer[0]=(byte)inputStream.read();
-	    waitForAvailable();
-	    lineBuffer[1]=(byte)inputStream.read();
-	    lineLength=2;
-	    while(lineBuffer[lineLength-2]!=(byte)13 ||
-		  lineBuffer[lineLength-1]!=(byte)10){
-		waitForAvailable();
-		lineBuffer[lineLength]=(byte)inputStream.read();
-		lineLength++;
-	    }
-	    return new String(lineBuffer, 0, lineLength-2);
-	}catch(ArrayIndexOutOfBoundsException aioobe){
-	    throw new IOException("Line too long.");
+ */
+public class LineInputStream extends InputStream {
+	public static final long READ_TIMEOUT = 5000;
+	public static final int SLEEP_TIME = 100;
+	public static final int BUFFER_SIZE = 16384;
+	private byte[] lineBuffer = new byte[BUFFER_SIZE];
+	private InputStream inputStream;
+	private int lineLength;
+
+	/**
+	 * Creates a new LineInputStream that gets its data from the given
+	 * InputStream.
+	 *
+	 * @param is
+	 *            where to read lines from
+	 */
+	public LineInputStream(InputStream is) {
+		inputStream = is;
 	}
-    }
-
-    /**
-     * Waits for the input stream to become available.  If this does
-     * not occur after a while, it times out and throws an IOException
-     **/
-    private void waitForAvailable() throws IOException, ReadTimeoutException{
-	long expireTime = System.currentTimeMillis()+READ_TIMEOUT;
-	while(inputStream.available()<1){
-	    if(System.currentTimeMillis()>expireTime){
-		throw new ReadTimeoutException("Read Timed Out.");
-	    }
-	    try{
-		Thread.sleep(SLEEP_TIME);
-	    }catch(InterruptedException ie){
-	    }
+
+	/**
+	 * Reads one line. Expects lines from the input stream to be terminated by
+	 * a CRLF. Lines are returned without the trailing CRLF. Will throw an
+	 * IOException for lines that exceed BUFFER_SIZE, if a timeout occurs while
+	 * waiting for data, or if there are problems reading the data.
+	 *
+	 * @return one line
+	 */
+	public String readLine() throws IOException, ReadTimeoutException {
+		try {
+			waitForAvailable();
+			lineBuffer[0] = (byte) inputStream.read();
+			waitForAvailable();
+			lineBuffer[1] = (byte) inputStream.read();
+			lineLength = 2;
+			while (lineBuffer[lineLength - 2] != (byte) 13
+				|| lineBuffer[lineLength - 1] != (byte) 10) {
+				waitForAvailable();
+				lineBuffer[lineLength] = (byte) inputStream.read();
+				lineLength++;
+			}
+			return new String(lineBuffer, 0, lineLength - 2);
+		} catch (ArrayIndexOutOfBoundsException aioobe) {
+			throw new IOException("Line too long.");
+		}
+	}
+
+	/**
+	 * Waits for the input stream to become available. If this does not occur
+	 * after a while, it times out and throws an IOException
+	 */
+	private void waitForAvailable() throws IOException, ReadTimeoutException {
+		long expireTime = System.currentTimeMillis() + READ_TIMEOUT;
+		while (inputStream.available() < 1) {
+			if (System.currentTimeMillis() > expireTime) {
+				throw new ReadTimeoutException("Read Timed Out.");
+			}
+			try {
+				Thread.sleep(SLEEP_TIME);
+			} catch (InterruptedException ie) {
+			}
+		}
+	}
+
+	public int available() throws IOException {
+		return inputStream.available();
+	}
+	public void close() throws IOException {
+		inputStream.close();
+	}
+	public void mark(int readlimit) {
+		inputStream.mark(readlimit);
+	}
+	public boolean markSupported() {
+		return inputStream.markSupported();
+	}
+	public int read() throws IOException {
+		return inputStream.read();
+	}
+	public int read(byte[] b) throws IOException {
+		return inputStream.read(b);
+	}
+	public int read(byte[] b, int off, int len) throws IOException {
+		return inputStream.read(b, off, len);
+	}
+	public void reset() throws IOException {
+		inputStream.reset();
+	}
+	public long skip(long n) throws IOException {
+		return inputStream.skip(n);
 	}
-    }
-
-    public int available() throws IOException{
-	return inputStream.available();
-    }
-    public void close() throws IOException{
-	inputStream.close();
-    }
-    public void mark(int readlimit){
-	inputStream.mark(readlimit);
-    }
-    public boolean markSupported(){
-	return inputStream.markSupported();
-    }
-    public int read() throws IOException{
-	return inputStream.read();
-    }
-    public int read(byte[] b) throws IOException{
-	return inputStream.read(b);
-    }
-    public int read(byte[] b, int off, int len) throws IOException{
-	return inputStream.read(b, off, len);
-    }
-    public void reset() throws IOException{
-	inputStream.reset();
-    }
-    public long skip(long n) throws IOException{
-	return inputStream.skip(n);
-    }
 }
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/Proxy.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/Proxy.java
index 294a923..25efe82 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/Proxy.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/Proxy.java
@@ -1,82 +1,84 @@
+package psl.memento.pervasive.crunch;
+
 import java.util.LinkedList;
 import java.util.Iterator;

 import javax.swing.*;
-import javax.swing.event.*;
+
 import java.awt.*;
 import java.awt.event.*;

 /**
  * @author Peter Grimm
- **/
-public class Proxy extends Thread implements ActionListener{
-
-    private LinkedList filters = new LinkedList();
-
-    private JFrame proxyWindow;
-    private JTabbedPane settingsTabs;
-    private JPanel buttonPanel;
-    private JButton cancelButton;
-    private JButton commitButton;
-
-    public static void main(String[] args){
-	new Proxy().start();
-    }
-
-    public void run(){
-	System.out.println("Started...");
-
-	filters.add(new ContentExtractor());
-
-	drawGUI();
-
-	new ProxyListener(filters, 4000).start();
-    }
-
-    public void drawGUI(){
-        proxyWindow = new JFrame("Not Totally Hacked Proxy 0.0a");
-	proxyWindow.setDefaultCloseOperation(proxyWindow.EXIT_ON_CLOSE);
-
-	settingsTabs = new JTabbedPane();
-	Iterator filterIter = filters.listIterator(0);
-	while(filterIter.hasNext()){
-	    ProxyFilter current = (ProxyFilter)(filterIter.next());
-	    settingsTabs.add(current.getSettingsGUI().getTabName(), new JScrollPane(current.getSettingsGUI()));
+ */
+public class Proxy extends Thread implements ActionListener {
+
+	private LinkedList filters = new LinkedList();
+
+	private JFrame proxyWindow;
+	private JTabbedPane settingsTabs;
+	private JPanel buttonPanel;
+	private JButton cancelButton;
+	private JButton commitButton;
+
+	public static void main(String[] args) {
+		new Proxy().start();
 	}
-
-	proxyWindow.getContentPane().add(settingsTabs, BorderLayout.CENTER);
-
-	buttonPanel = new JPanel();
-	buttonPanel.setLayout(new BoxLayout(buttonPanel, BoxLayout.X_AXIS));
-	buttonPanel.add(Box.createHorizontalGlue());
-	commitButton = new JButton("Commit Changes");
-	commitButton.addActionListener(this);
-	buttonPanel.add(commitButton);
-	cancelButton = new JButton("Cancel Changes");
-	cancelButton.addActionListener(this);
-	buttonPanel.add(cancelButton);
-
-	proxyWindow.getContentPane().add(buttonPanel, BorderLayout.SOUTH);
-
-	proxyWindow.pack();
-	proxyWindow.show();
-    }
-
-    public void actionPerformed(ActionEvent e){
-	Object command = e.getSource();
-	if(command==commitButton){
-	    int index = settingsTabs.getSelectedIndex();
-	    ProxyFilter filter = (ProxyFilter)(filters.get(index));
-	    filter.getSettingsGUI().commitSettings();
-	    System.out.println("Settings committed.");
-	}else if(command==cancelButton){
-	    int index = settingsTabs.getSelectedIndex();
-	    ProxyFilter filter = (ProxyFilter)(filters.get(index));
-	    filter.getSettingsGUI().revertSettings();
-	}else{
-	    System.out.println("Error: unknown command");
+
+	public void run() {
+		System.out.println("Started...");
+
+		filters.add(new ContentExtractor());
+
+		drawGUI();
+
+		new ProxyListener(filters, 4000).start();
 	}
-    }
-}

+	public void drawGUI() {
+		proxyWindow = new JFrame("Crunch 1.0");
+		proxyWindow.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+		settingsTabs = new JTabbedPane();
+		Iterator filterIter = filters.listIterator(0);
+		while (filterIter.hasNext()) {
+			ProxyFilter current = (ProxyFilter) (filterIter.next());
+			settingsTabs.add(
+				current.getSettingsGUI().getTabName(),
+				new JScrollPane(current.getSettingsGUI()));
+		}
+
+		proxyWindow.getContentPane().add(settingsTabs, BorderLayout.CENTER);
+
+		buttonPanel = new JPanel();
+		buttonPanel.setLayout(new BoxLayout(buttonPanel, BoxLayout.X_AXIS));
+		buttonPanel.add(Box.createHorizontalGlue());
+		commitButton = new JButton("Commit Changes");
+		commitButton.addActionListener(this);
+		buttonPanel.add(commitButton);
+		cancelButton = new JButton("Cancel Changes");
+		cancelButton.addActionListener(this);
+		buttonPanel.add(cancelButton);
+
+		proxyWindow.getContentPane().add(buttonPanel, BorderLayout.SOUTH);

+		proxyWindow.pack();
+		proxyWindow.show();
+	}
+
+	public void actionPerformed(ActionEvent e) {
+		Object command = e.getSource();
+		if (command == commitButton) {
+			int index = settingsTabs.getSelectedIndex();
+			ProxyFilter filter = (ProxyFilter) (filters.get(index));
+			filter.getSettingsGUI().commitSettings();
+			System.out.println("Settings committed.");
+		} else if (command == cancelButton) {
+			int index = settingsTabs.getSelectedIndex();
+			ProxyFilter filter = (ProxyFilter) (filters.get(index));
+			filter.getSettingsGUI().revertSettings();
+		} else {
+			System.out.println("Error: unknown command");
+		}
+	}
+}
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilter.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilter.java
index 5d45093..3bf4b46 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilter.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilter.java
@@ -1,8 +1,9 @@
+package psl.memento.pervasive.crunch;
+
 import java.io.*;
-import javax.swing.JPanel;

 public interface ProxyFilter {
-    public File process(File in) throws IOException;
-    public ProxyFilterSettings getSettingsGUI();
-    public String getContentType();
+	public File process(File in) throws IOException;
+	public ProxyFilterSettings getSettingsGUI();
+	public String getContentType();
 }
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilterSettings.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilterSettings.java
index 33056b4..ee0bf46 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilterSettings.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyFilterSettings.java
@@ -1,7 +1,9 @@
+package psl.memento.pervasive.crunch;
+
 import javax.swing.JPanel;

-public abstract class ProxyFilterSettings extends JPanel{
-    public abstract void commitSettings();
-    public abstract void revertSettings();
-    public abstract String getTabName();
+public abstract class ProxyFilterSettings extends JPanel {
+	public abstract void commitSettings();
+	public abstract void revertSettings();
+	public abstract String getTabName();
 }
\ No newline at end of file
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyListener.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyListener.java
index 4d66263..8557ba5 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyListener.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyListener.java
@@ -1,108 +1,113 @@
+package psl.memento.pervasive.crunch;
+
 import java.net.Socket;
 import java.net.ServerSocket;
-import java.net.SocketException;
 import java.io.IOException;
 import java.util.LinkedList;

 /**
  * @author Peter Grimm
- **/
-public class ProxyListener extends Thread{
-    public static final int DEFAULT_PORT = 8080;
-    public static final int ACCEPT_TIMEOUT = 50;
-
-    private boolean loop;
-    private int port;
-    private ServerSocket serverSocket;
-    private LinkedList filters;
-
-    private LinkedList recycledSockets = new LinkedList();
-
-
-    public int getPort(){
-	return port;
-    }
-
-    /**
-     * @param listen_port the port the proxy should listen on.
-     **/
-    public ProxyListener(LinkedList ll, int listen_port){
-	super();
-
-	loop=true;
-
-	filters=ll;
-
-	port = listen_port;
-	try{
-	    serverSocket = new ServerSocket(port);
-	    serverSocket.setSoTimeout(ACCEPT_TIMEOUT);
-	    System.out.println("Listening on port "+listen_port);
-	}catch(IOException ioe){
-	    ioe.printStackTrace();
+ */
+public class ProxyListener extends Thread {
+	public static final int DEFAULT_PORT = 8080;
+	public static final int ACCEPT_TIMEOUT = 50;
+
+	private boolean loop;
+	private int port;
+	private ServerSocket serverSocket;
+	private LinkedList filters;
+
+	private LinkedList recycledSockets = new LinkedList();
+
+	public int getPort() {
+		return port;
 	}
-    }
-
-    /**
-     * Creates a new ProxyListener Listening on the default port
-     * of 8080
-     **/
-    public ProxyListener(LinkedList ll){
-	this(ll, DEFAULT_PORT);
-    }
-
-
-
-    public void recycle(Socket s){
-	synchronized(recycledSockets){
-	    recycledSockets.add(s);
+
+	/**
+	 * @param listen_port
+	 *            the port the proxy should listen on.
+	 */
+	public ProxyListener(LinkedList ll, int listen_port) {
+		super();
+
+		loop = true;
+
+		filters = ll;
+
+		port = listen_port;
+		try {
+			serverSocket = new ServerSocket(port);
+			serverSocket.setSoTimeout(ACCEPT_TIMEOUT);
+			System.out.println("Listening on port " + listen_port);
+		} catch (IOException ioe) {
+			ioe.printStackTrace();
+		}
 	}
-    }
-
-    /**
-     * Listens for connections and spawns worker threads for these connections
-     **/
-    public void run(){
-	//loop until explicitly told to shutdown
-	while(loop){
-	    try{
-		while(loop){//run until an exception is thrown
-		    //get recycled sockets (persistent connections)
-		    synchronized(recycledSockets){
-			while(recycledSockets.size()>0){
-			    Socket socket = (Socket)recycledSockets.removeFirst();
-			    new ProxyThread(this, socket, filters.listIterator(0)).run();
-			}
-		    }
-		    //get a socket from the serversocket
-		    try{
-			Socket socket = serverSocket.accept();
-			new ProxyThread(this, socket, filters.listIterator(0)).run();
-		    }catch(java.net.SocketTimeoutException ste){}
+
+	/**
+	 * Creates a new ProxyListener Listening on the default port of 8080
+	 */
+	public ProxyListener(LinkedList ll) {
+		this(ll, DEFAULT_PORT);
+	}
+
+	public void recycle(Socket s) {
+		synchronized (recycledSockets) {
+			recycledSockets.add(s);
 		}
-	    }catch(NullPointerException npe){
-	    }catch(Exception e){//other error
-		e.printStackTrace();
-	    }
 	}
-	try{
-	    serverSocket.close();
-	}catch(Exception e){
-	    e.printStackTrace();
+
+	/**
+	 * Listens for connections and spawns worker threads for these connections
+	 */
+	public void run() {
+		//loop until explicitly told to shutdown
+		while (loop) {
+			try {
+				while (loop) { //run until an exception is thrown
+					//get recycled sockets (persistent connections)
+					synchronized (recycledSockets) {
+						while (recycledSockets.size() > 0) {
+							Socket socket =
+								(Socket) recycledSockets.removeFirst();
+							new ProxyThread(
+								this,
+								socket,
+								filters.listIterator(0))
+								.run();
+						}
+					}
+					//get a socket from the serversocket
+					try {
+						Socket socket = serverSocket.accept();
+						new ProxyThread(this, socket, filters.listIterator(0))
+							.run();
+					} catch (java.net.SocketTimeoutException ste) {
+					}
+				}
+			} catch (NullPointerException npe) {
+			} catch (Exception e) { //other error
+				e.printStackTrace();
+			}
+		}
+		try {
+			serverSocket.close();
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
 	}
-    }
-
-    /**
-     * Closes the listening socket and halts the thread.
-     * Works by closing the server socket which causes the
-     * accept method to except, breaking the while loop.
-     **/
-    public void halt(){
-	loop=false;
-	try{
-	    serverSocket.close();
-	}catch(IOException ioe){
-
+
+	/**
+	 * Closes the listening socket and halts the thread. Works by closing the
+	 * server socket which causes the accept method to except, breaking the
+	 * while loop.
+	 */
+	public void halt() {
+		loop = false;
+		try {
+			serverSocket.close();
+		} catch (IOException ioe) {
+
+		}
 	}
-    }
 }
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyThread.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyThread.java
index 26de79e..141a106 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyThread.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/ProxyThread.java
@@ -1,106 +1,109 @@
+package psl.memento.pervasive.crunch;
+
 import java.net.Socket;
 import java.util.Iterator;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.File;
 import java.io.IOException;
-public class ProxyThread extends Thread{
-    Socket socket;
-    Socket sSocket;
-    Iterator filters;
-    ProxyListener listener;
-    public ProxyThread(ProxyListener pl, Socket s, Iterator pf){
-	super();
-	listener = pl;
-	socket=s;
-	filters=pf;
-    }
-
-    public void run(){
-	try{
-	    //send request to server
-	    HttpStream clientStream = new HttpStream(socket);
-	    InputStream fromServer = sendServerRequest(clientStream);
-
-	    //get response from server
-	    HttpStream serverStream = new HttpStream(sSocket);
-
-	    //filter if text/html
-	    String type = serverStream.getAttribute("Content-Type");
-	    System.out.println("Detected content type = "+type);
-	    if(type!=null && -1 < type.toLowerCase().indexOf("text/html")){
-		filter(serverStream);
-	    }
-
-	    //send back to client
-	    System.out.println("Getting client output stream...");
-	    OutputStream outputStream = socket.getOutputStream();
-	    System.out.println("Sending header...");
-	    serverStream.sendHeaderToStream(outputStream);
-	    System.out.println("Sending content...");
-	    serverStream.sendContentToStream(outputStream);
-	    System.out.println("Done.");
-
-	    //close the server socket
-	    sSocket.close();
-	    socket.close();
-	    //check to see if the client socket should be recycled
-	    /*
-	      String keepalive = clientStream.getAttribute("proxy-connection");
-	      if(keepalive!=null && keepalive.toLowerCase().equals("keep-alive")){
-	      listener.recycle(socket);
-	      }
-	    */
-
-	}catch(ReadTimeoutException rte){
-	    try{socket.close();}
-	    catch(Exception ex){}
-	    try{sSocket.close();}
-	    catch(Exception ex){}
-	    System.out.println("Transfer Timeout Occurred.");
-	}catch(Exception e){
-	    try{socket.close();}
-	    catch(Exception ex){}
-	    try{sSocket.close();}
-	    catch(Exception ex){}
-	    e.printStackTrace();
-	}
-    }
-
-    public InputStream sendServerRequest(HttpStream http) throws Exception{
-	String host = http.getAttribute("Host");
-	sSocket = new Socket(host, 80);
-	OutputStream out = sSocket.getOutputStream();
-	http.sendHeaderToStream(out);
-	//http.sendContentToStream(out);
-	return sSocket.getInputStream();
-    }
-
-    public void filter(HttpStream http) throws IOException{
-	File workingFile = null;
-	workingFile = http.downloadToFile();
-	while(filters.hasNext()){
-	    try{
-		ProxyFilter filter = (ProxyFilter)(filters.next());
-		System.out.println("Started filtering...");
-		workingFile.deleteOnExit();
-		workingFile = filter.process(workingFile);
-		workingFile.deleteOnExit();
-		http.setAttribute("content-type", filter.getContentType());
-		System.out.println("Done filtering.");
-	    }catch(Exception e){
-		e.printStackTrace();
-	    }
+public class ProxyThread extends Thread {
+	Socket socket;
+	Socket sSocket;
+	Iterator filters;
+	ProxyListener listener;
+	public ProxyThread(ProxyListener pl, Socket s, Iterator pf) {
+		super();
+		listener = pl;
+		socket = s;
+		filters = pf;
 	}
-	http.replaceContentWithFile(workingFile);
-	System.out.println("content replaced");
-    }
-}

+	public void run() {
+		try {
+			//send request to server
+			HttpStream clientStream = new HttpStream(socket);
+			InputStream fromServer = sendServerRequest(clientStream);

+			//get response from server
+			HttpStream serverStream = new HttpStream(sSocket);

+			//filter if text/html
+			String type = serverStream.getAttribute("Content-Type");
+			System.out.println("Detected content type = " + type);
+			if (type != null && -1 < type.toLowerCase().indexOf("text/html")) {
+				filter(serverStream);
+			}

+			//send back to client
+			System.out.println("Getting client output stream...");
+			OutputStream outputStream = socket.getOutputStream();
+			System.out.println("Sending header...");
+			serverStream.sendHeaderToStream(outputStream);
+			System.out.println("Sending content...");
+			serverStream.sendContentToStream(outputStream);
+			System.out.println("Done.");

+			//close the server socket
+			sSocket.close();
+			socket.close();
+			//check to see if the client socket should be recycled
+			/*
+			 * String keepalive =
+			 * clientStream.getAttribute("proxy-connection");
+			 * if(keepalive!=null &&
+			 * keepalive.toLowerCase().equals("keep-alive")){
+			 * listener.recycle(socket); }
+			 */

+		} catch (ReadTimeoutException rte) {
+			try {
+				socket.close();
+			} catch (Exception ex) {
+			}
+			try {
+				sSocket.close();
+			} catch (Exception ex) {
+			}
+			System.out.println("Transfer Timeout Occurred.");
+		} catch (Exception e) {
+			try {
+				socket.close();
+			} catch (Exception ex) {
+			}
+			try {
+				sSocket.close();
+			} catch (Exception ex) {
+			}
+			e.printStackTrace();
+		}
+	}

+	public InputStream sendServerRequest(HttpStream http) throws Exception {
+		String host = http.getAttribute("Host");
+		sSocket = new Socket(host, 80);
+		OutputStream out = sSocket.getOutputStream();
+		http.sendHeaderToStream(out);
+		//http.sendContentToStream(out);
+		return sSocket.getInputStream();
+	}

+	public void filter(HttpStream http) throws IOException {
+		File workingFile = null;
+		workingFile = http.downloadToFile();
+		while (filters.hasNext()) {
+			try {
+				ProxyFilter filter = (ProxyFilter) (filters.next());
+				System.out.println("Started filtering...");
+				workingFile.deleteOnExit();
+				workingFile = filter.process(workingFile);
+				workingFile.deleteOnExit();
+				http.setAttribute("content-type", filter.getContentType());
+				System.out.println("Done filtering.");
+			} catch (Exception e) {
+				e.printStackTrace();
+			}
+		}
+		http.replaceContentWithFile(workingFile);
+		System.out.println("content replaced");
+	}
+}
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/ReadTimeoutException.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/ReadTimeoutException.java
index 65e6e0f..99586da 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/ReadTimeoutException.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/ReadTimeoutException.java
@@ -1,11 +1,13 @@
+package psl.memento.pervasive.crunch;
+
 /**
  * Signals that a timeout has occurred.
- **/
-public class ReadTimeoutException extends Exception{
-    public ReadTimeoutException(){
-	super();
-    }
-    public ReadTimeoutException(String s){
-	super(s);
-    }
+ */
+public class ReadTimeoutException extends Exception {
+	public ReadTimeoutException() {
+		super();
+	}
+	public ReadTimeoutException(String s) {
+		super(s);
+	}
 }
diff --git a/pervasive/crunch/src/psl/memento/pervasive/crunch/SettingsEditor.java b/pervasive/crunch/src/psl/memento/pervasive/crunch/SettingsEditor.java
index fc87e81..142509d 100644
--- a/pervasive/crunch/src/psl/memento/pervasive/crunch/SettingsEditor.java
+++ b/pervasive/crunch/src/psl/memento/pervasive/crunch/SettingsEditor.java
@@ -1,867 +1,1064 @@
-import javax.swing.*;
-import java.awt.Rectangle;
+package psl.memento.pervasive.crunch;
+
 import java.awt.event.*;

 /**
  * SettingsEditor.java
- * @author  David Neistadt
+ *
+ * @author David Neistadt
  */
 public class SettingsEditor extends ProxyFilterSettings {
-    //instance variables
-    ContentExtractor mFilter;
-
-    //final variables
-    private final String TAB_NAME = "Extractor Settings";
-
-    /** Creates new form SettingsEditor
-     * @param iFilter the ContentExtractor to change the settings of.
-     */
-    public SettingsEditor(ContentExtractor iFilter) {
-        initComponents();
-        mFilter = iFilter;
-        loadSettings();
-    }
-
-    /** This method is called from within the constructor to
-     * initialize the form.
-     * WARNING: Do NOT modify this code. The content of this method is
-     * always regenerated by the Form Editor.
-     */
-    private void initComponents() {//GEN-BEGIN:initComponents
-        java.awt.GridBagConstraints gridBagConstraints;
-
-        buttonGroupOutput = new javax.swing.ButtonGroup();
-        settingsTabs = new javax.swing.JTabbedPane();
-        ignorePanel = new javax.swing.JPanel();
-        ignoreAdsCheck = new javax.swing.JCheckBox();
-        ignoreScriptsCheck = new javax.swing.JCheckBox();
-        ignoreNoscriptCheck = new javax.swing.JCheckBox();
-        ignoreStylesCheck = new javax.swing.JCheckBox();
-        ignoreStyleInDivCheck = new javax.swing.JCheckBox();
-        ignoreImagesCheck = new javax.swing.JCheckBox();
-        ignoreTextLinksCheck = new javax.swing.JCheckBox();
-        ignoreImageLinksCheck = new javax.swing.JCheckBox();
-        ignoreFormsCheck = new javax.swing.JCheckBox();
-        ignoreMetaCheck = new javax.swing.JCheckBox();
-        ignoreInputCheck = new javax.swing.JCheckBox();
-        ignoreButtonCheck = new javax.swing.JCheckBox();
-        ignoreSelectCheck = new javax.swing.JCheckBox();
-        ignoreTableCellWidthsCheck = new javax.swing.JCheckBox();
-        ignoreIframeCheck = new javax.swing.JCheckBox();
-        displayAltTagsCheck = new javax.swing.JCheckBox();
-        displayImageLinkAltsCheck = new javax.swing.JCheckBox();
-        ignoreEmbedCheck = new javax.swing.JCheckBox();
-        advancedPanel = new javax.swing.JPanel();
-        advancedLayoutPanel = new javax.swing.JPanel();
-        ignoreLinkListsCheck = new javax.swing.JCheckBox();
-        ignoreLLTextLinksCheck = new javax.swing.JCheckBox();
-        ignoreOnlyTextAndLinksCheck = new javax.swing.JCheckBox();
-        linkTextRatioLabel = new javax.swing.JLabel();
-        linkTextRatio = new javax.swing.JTextField();
-        ignoreLLImageLinksCheck = new javax.swing.JCheckBox();
-        removeEmptyTablesCheck = new javax.swing.JCheckBox();
-        substanceLabel = new javax.swing.JLabel();
-        substanceImageCheck = new javax.swing.JCheckBox();
-        substanceLinksCheck = new javax.swing.JCheckBox();
-        substanceInputCheck = new javax.swing.JCheckBox();
-        substanceSelectCheck = new javax.swing.JCheckBox();
-        minimumTextLengthLabel = new javax.swing.JLabel();
-        minimumTextLengthField = new javax.swing.JTextField();
-        substanceIFrameCheck = new javax.swing.JCheckBox();
-        substanceFormCheck = new javax.swing.JCheckBox();
-        substanceButtonCheck = new javax.swing.JCheckBox();
-        substanceTextareaCheck = new javax.swing.JCheckBox();
-        jSeparator1 = new javax.swing.JSeparator();
-        outputPanel = new javax.swing.JPanel();
-        outputFormatPanel = new javax.swing.JPanel();
-        htmlOutputButton = new javax.swing.JRadioButton();
-        textOutputButton = new javax.swing.JRadioButton();
-        htmlOutputPanel = new javax.swing.JPanel();
-        appendLinksCheck = new javax.swing.JCheckBox();
-        textOutputPanel = new javax.swing.JPanel();
-        limitLineBreaksCheck = new javax.swing.JCheckBox();
-        maxLineBreaksLabel = new javax.swing.JLabel();
-        maxLineBreaksField = new javax.swing.JTextField();
-
-        setLayout(new java.awt.BorderLayout());
-
-        setMaximumSize(new java.awt.Dimension(445, 392));
-        settingsTabs.setTabPlacement(javax.swing.JTabbedPane.BOTTOM);
-        ignorePanel.setLayout(new java.awt.GridBagLayout());
-
-        ignoreAdsCheck.setText("Ignore All Advertisements");
-        ignoreAdsCheck.setAlignmentY(0.0F);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 0;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.ipadx = 9;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreAdsCheck, gridBagConstraints);
-
-        ignoreScriptsCheck.setText("Ignore Scripts");
-        ignoreScriptsCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 1;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.ipadx = 55;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreScriptsCheck, gridBagConstraints);
-
-        ignoreNoscriptCheck.setText("Enable <NOSCIPT> tags");
-        ignoreNoscriptCheck.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 4;
-        gridBagConstraints.gridy = 1;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.ipadx = 11;
-        ignorePanel.add(ignoreNoscriptCheck, gridBagConstraints);
-
-        ignoreStylesCheck.setText("Ignore Styles");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 2;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreStylesCheck, gridBagConstraints);
-
-        ignoreStyleInDivCheck.setText("Ignore Style Attribute in <DIV> tags");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 3;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreStyleInDivCheck, gridBagConstraints);
-
-        ignoreImagesCheck.setText("Ignore Non-Link Images");
-        ignoreImagesCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 4;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreImagesCheck, gridBagConstraints);
-
-        ignoreTextLinksCheck.setText("Ignore Text Links");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 6;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreTextLinksCheck, gridBagConstraints);
-
-        ignoreImageLinksCheck.setText("Ignore Image Links");
-        ignoreImageLinksCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 5;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreImageLinksCheck, gridBagConstraints);
-
-        ignoreFormsCheck.setText("Ignore Forms");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 12;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreFormsCheck, gridBagConstraints);
-
-        ignoreMetaCheck.setText("Ignore <META> tags");
-        ignoreMetaCheck.addActionListener(new java.awt.event.ActionListener() {
-            public void actionPerformed(java.awt.event.ActionEvent evt) {
-                ignoreMetaCheckActionPerformed(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 16;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreMetaCheck, gridBagConstraints);
-
-        ignoreInputCheck.setText("Ignore <INPUT> tags");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 13;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreInputCheck, gridBagConstraints);
-
-        ignoreButtonCheck.setText("Ignore <BUTTON> tags");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 14;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreButtonCheck, gridBagConstraints);
-
-        ignoreSelectCheck.setText("Ignore <SELECT> tags");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 15;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreSelectCheck, gridBagConstraints);
-
-        ignoreTableCellWidthsCheck.setText("Ignore Table Cell Widths");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 18;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreTableCellWidthsCheck, gridBagConstraints);
-
-        ignoreIframeCheck.setText("Ignore <IFRAME> tags");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 17;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreIframeCheck, gridBagConstraints);
-
-        displayAltTagsCheck.setText("Display ALT Links");
-        displayAltTagsCheck.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 4;
-        gridBagConstraints.gridy = 4;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        ignorePanel.add(displayAltTagsCheck, gridBagConstraints);
-
-        displayImageLinkAltsCheck.setText("Display ALT Links");
-        displayImageLinkAltsCheck.setEnabled(false);
-        displayImageLinkAltsCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 4;
-        gridBagConstraints.gridy = 5;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        ignorePanel.add(displayImageLinkAltsCheck, gridBagConstraints);
-
-        ignoreEmbedCheck.setText("Ignore <EMBED> tags");
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 19;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        ignorePanel.add(ignoreEmbedCheck, gridBagConstraints);
-
-        settingsTabs.addTab("Ignore Settings", ignorePanel);
-
-        advancedLayoutPanel.setLayout(new java.awt.GridBagLayout());
-
-        ignoreLinkListsCheck.setText("Ignore Link Lists");
-        ignoreLinkListsCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 0;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        advancedLayoutPanel.add(ignoreLinkListsCheck, gridBagConstraints);
-
-        ignoreLLTextLinksCheck.setText("Text Links");
-        ignoreLLTextLinksCheck.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 1;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
-        advancedLayoutPanel.add(ignoreLLTextLinksCheck, gridBagConstraints);
-
-        ignoreOnlyTextAndLinksCheck.setText("Ignore Only Text and Links");
-        ignoreOnlyTextAndLinksCheck.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 3;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
-        advancedLayoutPanel.add(ignoreOnlyTextAndLinksCheck, gridBagConstraints);
-
-        linkTextRatioLabel.setText("Link/Text Removal Ratio");
-        linkTextRatioLabel.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 4;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 10);
-        advancedLayoutPanel.add(linkTextRatioLabel, gridBagConstraints);
-
-        linkTextRatio.setText("0.25");
-        linkTextRatio.setPreferredSize(new java.awt.Dimension(55, 20));
-        linkTextRatio.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 1;
-        gridBagConstraints.gridy = 4;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.ipadx = 50;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 0, 0, 6);
-        advancedLayoutPanel.add(linkTextRatio, gridBagConstraints);
-
-        ignoreLLImageLinksCheck.setText("Image Links");
-        ignoreLLImageLinksCheck.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 2;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
-        advancedLayoutPanel.add(ignoreLLImageLinksCheck, gridBagConstraints);
-
-        removeEmptyTablesCheck.setText("Remove Empty Tables");
-        removeEmptyTablesCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 6;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
-        advancedLayoutPanel.add(removeEmptyTablesCheck, gridBagConstraints);
-
-        substanceLabel.setText("Tags to Consider as Substance:");
-        substanceLabel.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 7;
-        gridBagConstraints.gridwidth = 3;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
-        advancedLayoutPanel.add(substanceLabel, gridBagConstraints);
-
-        substanceImageCheck.setText("<IMG>");
-        substanceImageCheck.setEnabled(false);
-        substanceImageCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 8;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
-        advancedLayoutPanel.add(substanceImageCheck, gridBagConstraints);
-
-        substanceLinksCheck.setText("<A>");
-        substanceLinksCheck.setEnabled(false);
-        substanceLinksCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 9;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
-        advancedLayoutPanel.add(substanceLinksCheck, gridBagConstraints);
-
-        substanceInputCheck.setText("<INPUT>");
-        substanceInputCheck.setEnabled(false);
-        substanceInputCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 10;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
-        advancedLayoutPanel.add(substanceInputCheck, gridBagConstraints);
-
-        substanceSelectCheck.setText("<SELECT>");
-        substanceSelectCheck.setEnabled(false);
-        substanceSelectCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 11;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
-        advancedLayoutPanel.add(substanceSelectCheck, gridBagConstraints);
-
-        minimumTextLengthLabel.setHorizontalAlignment(javax.swing.SwingConstants.RIGHT);
-        minimumTextLengthLabel.setText("Minimum Text Length");
-        minimumTextLengthLabel.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 12;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.EAST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 10);
-        advancedLayoutPanel.add(minimumTextLengthLabel, gridBagConstraints);
-
-        minimumTextLengthField.setText("1");
-        minimumTextLengthField.setEnabled(false);
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 1;
-        gridBagConstraints.gridy = 12;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        gridBagConstraints.ipadx = 50;
-        gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
-        gridBagConstraints.insets = new java.awt.Insets(0, 0, 0, 6);
-        advancedLayoutPanel.add(minimumTextLengthField, gridBagConstraints);
-
-        substanceIFrameCheck.setText("<IFRAME>");
-        substanceIFrameCheck.setEnabled(false);
-        substanceIFrameCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 1;
-        gridBagConstraints.gridy = 11;
-        gridBagConstraints.gridwidth = 2;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        advancedLayoutPanel.add(substanceIFrameCheck, gridBagConstraints);
-
-        substanceFormCheck.setText("<FORM>");
-        substanceFormCheck.setEnabled(false);
-        substanceFormCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 1;
-        gridBagConstraints.gridy = 10;
-        gridBagConstraints.gridwidth = 2;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        advancedLayoutPanel.add(substanceFormCheck, gridBagConstraints);
-
-        substanceButtonCheck.setText("<BUTTON>");
-        substanceButtonCheck.setEnabled(false);
-        substanceButtonCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 1;
-        gridBagConstraints.gridy = 9;
-        gridBagConstraints.gridwidth = 2;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        advancedLayoutPanel.add(substanceButtonCheck, gridBagConstraints);
-
-        substanceTextareaCheck.setText("<TEXTAREA>");
-        substanceTextareaCheck.setEnabled(false);
-        substanceTextareaCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 1;
-        gridBagConstraints.gridy = 8;
-        gridBagConstraints.gridwidth = 2;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
-        advancedLayoutPanel.add(substanceTextareaCheck, gridBagConstraints);
-
-        gridBagConstraints = new java.awt.GridBagConstraints();
-        gridBagConstraints.gridx = 0;
-        gridBagConstraints.gridy = 5;
-        gridBagConstraints.gridwidth = java.awt.GridBagConstraints.REMAINDER;
-        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
-        gridBagConstraints.ipadx = 430;
-        gridBagConstraints.insets = new java.awt.Insets(6, 0, 0, 0);
-        advancedLayoutPanel.add(jSeparator1, gridBagConstraints);
-
-        advancedPanel.add(advancedLayoutPanel);
-
-        settingsTabs.addTab("Advanced Settings", null, advancedPanel, "null");
-
-        outputPanel.setLayout(new javax.swing.BoxLayout(outputPanel, javax.swing.BoxLayout.Y_AXIS));
-
-        outputFormatPanel.setLayout(new javax.swing.BoxLayout(outputFormatPanel, javax.swing.BoxLayout.Y_AXIS));
-
-        outputFormatPanel.setBorder(new javax.swing.border.TitledBorder("Output Format"));
-        outputFormatPanel.setAlignmentX(0.0F);
-        htmlOutputButton.setSelected(true);
-        htmlOutputButton.setText("HTML only");
-        buttonGroupOutput.add(htmlOutputButton);
-        htmlOutputButton.setHorizontalAlignment(javax.swing.SwingConstants.CENTER);
-        htmlOutputButton.setMaximumSize(new java.awt.Dimension(10000, 24));
-        outputFormatPanel.add(htmlOutputButton);
-
-        textOutputButton.setText("Text only");
-        buttonGroupOutput.add(textOutputButton);
-        textOutputButton.setHorizontalAlignment(javax.swing.SwingConstants.CENTER);
-        textOutputButton.setMaximumSize(new java.awt.Dimension(10000, 24));
-        outputFormatPanel.add(textOutputButton);
-
-        outputPanel.add(outputFormatPanel);
-
-        htmlOutputPanel.setLayout(new java.awt.FlowLayout(java.awt.FlowLayout.LEFT));
-
-        htmlOutputPanel.setBorder(new javax.swing.border.TitledBorder("HTML Output Settings"));
-        htmlOutputPanel.setAlignmentX(0.0F);
-        htmlOutputPanel.setMinimumSize(new java.awt.Dimension(100, 59));
-        htmlOutputPanel.setPreferredSize(new java.awt.Dimension(100, 59));
-        appendLinksCheck.setText("Append Links to Bottom of Page");
-        htmlOutputPanel.add(appendLinksCheck);
-
-        outputPanel.add(htmlOutputPanel);
-
-        textOutputPanel.setLayout(null);
-
-        textOutputPanel.setBorder(new javax.swing.border.TitledBorder("Text Output Settings"));
-        textOutputPanel.setAlignmentX(0.0F);
-        textOutputPanel.setPreferredSize(new java.awt.Dimension(0, 240));
-        limitLineBreaksCheck.setText("Limit number of line breaks");
-        limitLineBreaksCheck.addItemListener(new java.awt.event.ItemListener() {
-            public void itemStateChanged(java.awt.event.ItemEvent evt) {
-                selectionEnabler(evt);
-            }
-        });
-
-        textOutputPanel.add(limitLineBreaksCheck);
-        limitLineBreaksCheck.setBounds(10, 25, 180, 24);
-
-        maxLineBreaksLabel.setText("Maximum number of line breaks");
-        textOutputPanel.add(maxLineBreaksLabel);
-        maxLineBreaksLabel.setBounds(10, 50, 183, 16);
-
-        maxLineBreaksField.setColumns(5);
-        maxLineBreaksField.setText("2");
-        textOutputPanel.add(maxLineBreaksField);
-        maxLineBreaksField.setBounds(200, 50, 55, 20);
-
-        outputPanel.add(textOutputPanel);
-
-        settingsTabs.addTab("Output Settings", outputPanel);
-
-        add(settingsTabs, java.awt.BorderLayout.WEST);
-
-    }//GEN-END:initComponents
-
-    private void ignoreMetaCheckActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ignoreMetaCheckActionPerformed
-        // Add your handling code here:
-    }//GEN-LAST:event_ignoreMetaCheckActionPerformed
-
-    private void selectionEnabler(java.awt.event.ItemEvent evt) {//GEN-FIRST:event_selectionEnabler
-        // Check out what is the source
-        Object source = evt.getItemSelectable();
-
-        if (source == ignoreScriptsCheck) {
-            if (evt.getStateChange() == ItemEvent.DESELECTED)
-                ignoreNoscriptCheck.setEnabled(false);
-            else
-                ignoreNoscriptCheck.setEnabled(true);
-        }//if
-        else if (source == ignoreLinkListsCheck) {
-            if (evt.getStateChange() == ItemEvent.DESELECTED) {
-                ignoreLLTextLinksCheck.setEnabled(false);
-                ignoreLLImageLinksCheck.setEnabled(false);
-                ignoreOnlyTextAndLinksCheck.setEnabled(false);
-                linkTextRatio.setEnabled(false);
-                linkTextRatioLabel.setEnabled(false);
-            }//if
-            else {
-                ignoreLLTextLinksCheck.setEnabled(true);
-                ignoreLLImageLinksCheck.setEnabled(true);
-                ignoreOnlyTextAndLinksCheck.setEnabled(true);
-                linkTextRatio.setEnabled(true);
-                linkTextRatioLabel.setEnabled(true);
-            }//else
-        }//else if
-        else if (source == ignoreImagesCheck) {
-            if (evt.getStateChange() == ItemEvent.DESELECTED) {
-                displayAltTagsCheck.setEnabled(false);
-            }//if
-            else {
-                displayAltTagsCheck.setEnabled(true);
-            }
-        }
-        else if (source == ignoreImageLinksCheck) {
-            if (evt.getStateChange() == ItemEvent.DESELECTED)
-                displayImageLinkAltsCheck.setEnabled(false);
-            else
-                displayImageLinkAltsCheck.setEnabled(true);
-        }
-        else if (source == removeEmptyTablesCheck) {
-            if (evt.getStateChange() == ItemEvent.DESELECTED) {
-                substanceImageCheck.setEnabled(false);
-                substanceLinksCheck.setEnabled(false);
-                substanceInputCheck.setEnabled(false);
-                substanceSelectCheck.setEnabled(false);
-                substanceTextareaCheck.setEnabled(false);
-                substanceButtonCheck.setEnabled(false);
-                substanceFormCheck.setEnabled(false);
-                substanceIFrameCheck.setEnabled(false);
-                substanceLabel.setEnabled(false);
-                minimumTextLengthLabel.setEnabled(false);
-                minimumTextLengthField.setEnabled(false);
-            }//if
-            else {
-                substanceImageCheck.setEnabled(true);
-                substanceLinksCheck.setEnabled(true);
-                substanceInputCheck.setEnabled(true);
-                substanceSelectCheck.setEnabled(true);
-                substanceTextareaCheck.setEnabled(true);
-                substanceButtonCheck.setEnabled(true);
-                substanceFormCheck.setEnabled(true);
-                substanceIFrameCheck.setEnabled(true);
-                substanceLabel.setEnabled(true);
-                minimumTextLengthLabel.setEnabled(true);
-                minimumTextLengthField.setEnabled(true);
-            }//else
-        }//else if
-        else if (source == limitLineBreaksCheck) {
-            if (evt.getStateChange() == ItemEvent.DESELECTED) {
-                maxLineBreaksLabel.setEnabled(false);
-                maxLineBreaksField.setEnabled(false);
-            }
-            else {
-                maxLineBreaksLabel.setEnabled(true);
-                maxLineBreaksField.setEnabled(true);
-            }//if
-        }
-    }//GEN-LAST:event_selectionEnabler
-
-    /**
-     * Reloads settings GUI
-     * @param iFilter the ContentExtractor
-     */
-    public void reload(ContentExtractor iFilter) {
-	mFilter = iFilter;
-	loadSettings();
-    }
-
-    /**
-     * Loads the settings from the ContentExtractor by using accessor methods
-     */
-    public void loadSettings() {
-        //Use do click to run listeners and setSelected to just set things
-
-        if (mFilter.getSetting(ContentExtractor.ONLY_TEXT).equals("true")) textOutputButton.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_ADS).equals("true")) ignoreAdsCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_BUTTON_TAGS).equals("true")) ignoreButtonCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_FORMS).equals("true")) ignoreFormsCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_IFRAME_TAGS).equals("true")) ignoreIframeCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_IMAGE_LINKS).equals("true")) ignoreImageLinksCheck.doClick();;
-        if (mFilter.getSetting(ContentExtractor.DISPLAY_IMAGE_LINK_ALTS).equals("true")) displayImageLinkAltsCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_IMAGES).equals("true")) ignoreImagesCheck.doClick();
-        if (mFilter.getSetting(ContentExtractor.IGNORE_INPUT_TAGS).equals("true")) ignoreInputCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_LINK_CELLS).equals("true")) ignoreLinkListsCheck.doClick();
-        if (mFilter.getSetting(ContentExtractor.LC_IGNORE_IMAGE_LINKS).equals("true")) ignoreLLImageLinksCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.LC_IGNORE_TEXT_LINKS).equals("true")) ignoreLLTextLinksCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_META).equals("true")) ignoreMetaCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_SCRIPTS).equals("true")) ignoreScriptsCheck.doClick();
-        if (mFilter.getSetting(ContentExtractor.IGNORE_NOSCRIPT_TAGS).equals("true")) ignoreNoscriptCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_SELECT_TAGS).equals("true")) ignoreSelectCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_DIV_STYLES).equals("true")) ignoreStyleInDivCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_STYLES).equals("true")) ignoreStylesCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_CELL_WIDTH).equals("true")) ignoreTableCellWidthsCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_TEXT_LINKS).equals("true")) ignoreTextLinksCheck.setSelected(true);
-        linkTextRatio.setText(mFilter.getSetting(ContentExtractor.LINK_TEXT_REMOVAL_RATIO));
-        if (mFilter.getSetting(ContentExtractor.DISPLAY_IMAGE_ALTS).equals("true")) displayAltTagsCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.LC_ONLY_LINKS_AND_TEXT).equals("true")) ignoreOnlyTextAndLinksCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.REMOVE_EMPTY_TABLES).equals("true")) removeEmptyTablesCheck.doClick();
-        if (mFilter.getSetting(ContentExtractor.SUBSTANCE_BUTTON).equals("true")) substanceButtonCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.SUBSTANCE_FORM).equals("true")) substanceFormCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.SUBSTANCE_IFRAME).equals("true")) substanceIFrameCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.SUBSTANCE_IMAGE).equals("true")) substanceImageCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.SUBSTANCE_INPUT).equals("true")) substanceInputCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.SUBSTANCE_LINKS).equals("true")) substanceLinksCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.SUBSTANCE_SELECT).equals("true")) substanceSelectCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.SUBSTANCE_TEXTAREA).equals("true")) substanceTextareaCheck.setSelected(true);
-        minimumTextLengthField.setText(mFilter.getSetting(ContentExtractor.SUBSTANCE_MIN_TEXT_LENGTH));
-        if (mFilter.getSetting(ContentExtractor.ADD_LINKS_TO_BOTTOM).equals("true")) appendLinksCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.IGNORE_EMBED_TAGS).equals("true")) ignoreEmbedCheck.setSelected(true);
-        if (mFilter.getSetting(ContentExtractor.LIMIT_LINEBREAKS).equals("true")) limitLineBreaksCheck.doClick();
-        maxLineBreaksField.setText(mFilter.getSetting(ContentExtractor.MAX_LINEBREAKS));
-    }
-
-    /**
-     * Commits the settings so that the ContentExtractor reflects the user's
-     * specifications.
-     */
-    public void commitSettings() {
-        mFilter.changeSetting(ContentExtractor.ONLY_TEXT, Boolean.toString(textOutputButton.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_ADS, Boolean.toString(ignoreAdsCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_BUTTON_TAGS, Boolean.toString(ignoreButtonCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_FORMS, Boolean.toString(ignoreFormsCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_IFRAME_TAGS, Boolean.toString(ignoreIframeCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_IMAGE_LINKS, Boolean.toString(ignoreImageLinksCheck.isSelected()));
-        if (ignoreImageLinksCheck.isSelected())
-            mFilter.changeSetting(ContentExtractor.DISPLAY_IMAGE_LINK_ALTS, Boolean.toString(displayImageLinkAltsCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_TEXT_LINKS, Boolean.toString(ignoreTextLinksCheck.isSelected()));
-
-        if (ignoreImagesCheck.isSelected()) {
-            mFilter.changeSetting(ContentExtractor.DISPLAY_IMAGE_ALTS, Boolean.toString(displayAltTagsCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.IGNORE_IMAGES, Boolean.toString(ignoreImagesCheck.isSelected()));
-        }
-        else
-            mFilter.changeSetting(ContentExtractor.IGNORE_IMAGES, Boolean.toString(ignoreImagesCheck.isSelected()));
-
-        mFilter.changeSetting(ContentExtractor.IGNORE_INPUT_TAGS, Boolean.toString(ignoreInputCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_LINK_CELLS, Boolean.toString(ignoreLinkListsCheck.isSelected()));
-
-        if (ignoreLinkListsCheck.isSelected()) {
-            mFilter.changeSetting(ContentExtractor.LC_IGNORE_IMAGE_LINKS, Boolean.toString(ignoreLLImageLinksCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.LC_IGNORE_TEXT_LINKS, Boolean.toString(ignoreLLTextLinksCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.LINK_TEXT_REMOVAL_RATIO, linkTextRatio.getText());
-            mFilter.changeSetting(ContentExtractor.LC_ONLY_LINKS_AND_TEXT, Boolean.toString(ignoreOnlyTextAndLinksCheck.isSelected()));
-        }
-
-        mFilter.changeSetting(ContentExtractor.IGNORE_META, Boolean.toString(ignoreMetaCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_SCRIPTS, Boolean.toString(ignoreScriptsCheck.isSelected()));
-
-        if (ignoreNoscriptCheck.isSelected())
-            mFilter.changeSetting(ContentExtractor.IGNORE_NOSCRIPT_TAGS, Boolean.toString(ignoreNoscriptCheck.isSelected()));
-
-        mFilter.changeSetting(ContentExtractor.IGNORE_SELECT_TAGS, Boolean.toString(ignoreSelectCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_DIV_STYLES, Boolean.toString(ignoreStyleInDivCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_STYLES, Boolean.toString(ignoreStylesCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.IGNORE_CELL_WIDTH, Boolean.toString(ignoreTableCellWidthsCheck.isSelected()));
-
-        if (removeEmptyTablesCheck.isSelected()) {
-            mFilter.changeSetting(ContentExtractor.REMOVE_EMPTY_TABLES, Boolean.toString(removeEmptyTablesCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_BUTTON, Boolean.toString(substanceButtonCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_FORM, Boolean.toString(substanceFormCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_IFRAME, Boolean.toString(substanceIFrameCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_IMAGE, Boolean.toString(substanceImageCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_INPUT, Boolean.toString(substanceInputCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_LINKS, Boolean.toString(substanceLinksCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_SELECT, Boolean.toString(substanceSelectCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_TEXTAREA, Boolean.toString(substanceTextareaCheck.isSelected()));
-            mFilter.changeSetting(ContentExtractor.SUBSTANCE_MIN_TEXT_LENGTH, minimumTextLengthField.getText());
-        }//if
-        else
-            mFilter.changeSetting(ContentExtractor.REMOVE_EMPTY_TABLES, Boolean.toString(removeEmptyTablesCheck.isSelected()));
-
-        mFilter.changeSetting(ContentExtractor.IGNORE_EMBED_TAGS, Boolean.toString(ignoreEmbedCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.ADD_LINKS_TO_BOTTOM, Boolean.toString(appendLinksCheck.isSelected()));
-        mFilter.changeSetting(ContentExtractor.LIMIT_LINEBREAKS, Boolean.toString(limitLineBreaksCheck.isSelected()));
-        if (limitLineBreaksCheck.isSelected())
-            mFilter.changeSetting(ContentExtractor.MAX_LINEBREAKS, maxLineBreaksField.getText());
-
-        mFilter.saveSettings();
-    }
-
-    public String getTabName() {
-        return TAB_NAME;
-    }
-
-    public void revertSettings() {
-        this.removeAll();
-        initComponents();
-        loadSettings();
-        revalidate();
-    }
-
-    // Variables declaration - do not modify//GEN-BEGIN:variables
-    private javax.swing.JCheckBox ignoreLinkListsCheck;
-    private javax.swing.JCheckBox substanceImageCheck;
-    private javax.swing.JPanel advancedLayoutPanel;
-    private javax.swing.JSeparator jSeparator1;
-    private javax.swing.JTextField maxLineBreaksField;
-    private javax.swing.JCheckBox substanceInputCheck;
-    private javax.swing.JCheckBox ignoreNoscriptCheck;
-    private javax.swing.JLabel maxLineBreaksLabel;
-    private javax.swing.JCheckBox substanceButtonCheck;
-    private javax.swing.JLabel substanceLabel;
-    private javax.swing.JRadioButton textOutputButton;
-    private javax.swing.JCheckBox substanceLinksCheck;
-    private javax.swing.JCheckBox substanceTextareaCheck;
-    private javax.swing.JRadioButton htmlOutputButton;
-    private javax.swing.JCheckBox substanceSelectCheck;
-    private javax.swing.JCheckBox ignoreOnlyTextAndLinksCheck;
-    private javax.swing.JTextField minimumTextLengthField;
-    private javax.swing.JCheckBox ignoreImageLinksCheck;
-    private javax.swing.JCheckBox ignoreLLImageLinksCheck;
-    private javax.swing.JPanel htmlOutputPanel;
-    private javax.swing.JCheckBox substanceIFrameCheck;
-    private javax.swing.JLabel minimumTextLengthLabel;
-    private javax.swing.JPanel ignorePanel;
-    private javax.swing.JCheckBox ignoreScriptsCheck;
-    private javax.swing.JCheckBox ignoreMetaCheck;
-    private javax.swing.JCheckBox ignoreInputCheck;
-    private javax.swing.JLabel linkTextRatioLabel;
-    private javax.swing.ButtonGroup buttonGroupOutput;
-    private javax.swing.JCheckBox ignoreTextLinksCheck;
-    private javax.swing.JCheckBox ignoreLLTextLinksCheck;
-    private javax.swing.JCheckBox limitLineBreaksCheck;
-    private javax.swing.JCheckBox ignoreButtonCheck;
-    private javax.swing.JTabbedPane settingsTabs;
-    private javax.swing.JCheckBox ignoreImagesCheck;
-    private javax.swing.JCheckBox ignoreSelectCheck;
-    private javax.swing.JCheckBox removeEmptyTablesCheck;
-    private javax.swing.JCheckBox displayImageLinkAltsCheck;
-    private javax.swing.JCheckBox ignoreAdsCheck;
-    private javax.swing.JCheckBox displayAltTagsCheck;
-    private javax.swing.JCheckBox ignoreEmbedCheck;
-    private javax.swing.JCheckBox ignoreTableCellWidthsCheck;
-    private javax.swing.JCheckBox ignoreStyleInDivCheck;
-    private javax.swing.JPanel textOutputPanel;
-    private javax.swing.JCheckBox ignoreStylesCheck;
-    private javax.swing.JCheckBox substanceFormCheck;
-    private javax.swing.JCheckBox ignoreIframeCheck;
-    private javax.swing.JPanel outputFormatPanel;
-    private javax.swing.JCheckBox ignoreFormsCheck;
-    private javax.swing.JCheckBox appendLinksCheck;
-    private javax.swing.JTextField linkTextRatio;
-    private javax.swing.JPanel outputPanel;
-    private javax.swing.JPanel advancedPanel;
-    // End of variables declaration//GEN-END:variables
-
+	//instance variables
+	ContentExtractor mFilter;
+
+	//final variables
+	private final String TAB_NAME = "Extractor Settings";
+
+	/**
+	 * Creates new form SettingsEditor
+	 *
+	 * @param iFilter
+	 *            the ContentExtractor to change the settings of.
+	 */
+	public SettingsEditor(ContentExtractor iFilter) {
+		initComponents();
+		mFilter = iFilter;
+		loadSettings();
+	}
+
+	/**
+	 * This method is called from within the constructor to initialize the
+	 * form. WARNING: Do NOT modify this code. The content of this method is
+	 * always regenerated by the Form Editor.
+	 */
+	private void initComponents() { //GEN-BEGIN:initComponents
+		java.awt.GridBagConstraints gridBagConstraints;
+
+		buttonGroupOutput = new javax.swing.ButtonGroup();
+		settingsTabs = new javax.swing.JTabbedPane();
+		ignorePanel = new javax.swing.JPanel();
+		ignoreAdsCheck = new javax.swing.JCheckBox();
+		ignoreScriptsCheck = new javax.swing.JCheckBox();
+		ignoreNoscriptCheck = new javax.swing.JCheckBox();
+		ignoreStylesCheck = new javax.swing.JCheckBox();
+		ignoreStyleInDivCheck = new javax.swing.JCheckBox();
+		ignoreImagesCheck = new javax.swing.JCheckBox();
+		ignoreTextLinksCheck = new javax.swing.JCheckBox();
+		ignoreImageLinksCheck = new javax.swing.JCheckBox();
+		ignoreFormsCheck = new javax.swing.JCheckBox();
+		ignoreMetaCheck = new javax.swing.JCheckBox();
+		ignoreInputCheck = new javax.swing.JCheckBox();
+		ignoreButtonCheck = new javax.swing.JCheckBox();
+		ignoreSelectCheck = new javax.swing.JCheckBox();
+		ignoreTableCellWidthsCheck = new javax.swing.JCheckBox();
+		ignoreIframeCheck = new javax.swing.JCheckBox();
+		displayAltTagsCheck = new javax.swing.JCheckBox();
+		displayImageLinkAltsCheck = new javax.swing.JCheckBox();
+		ignoreEmbedCheck = new javax.swing.JCheckBox();
+		advancedPanel = new javax.swing.JPanel();
+		advancedLayoutPanel = new javax.swing.JPanel();
+		ignoreLinkListsCheck = new javax.swing.JCheckBox();
+		ignoreLLTextLinksCheck = new javax.swing.JCheckBox();
+		ignoreOnlyTextAndLinksCheck = new javax.swing.JCheckBox();
+		linkTextRatioLabel = new javax.swing.JLabel();
+		linkTextRatio = new javax.swing.JTextField();
+		ignoreLLImageLinksCheck = new javax.swing.JCheckBox();
+		removeEmptyTablesCheck = new javax.swing.JCheckBox();
+		substanceLabel = new javax.swing.JLabel();
+		substanceImageCheck = new javax.swing.JCheckBox();
+		substanceLinksCheck = new javax.swing.JCheckBox();
+		substanceInputCheck = new javax.swing.JCheckBox();
+		substanceSelectCheck = new javax.swing.JCheckBox();
+		minimumTextLengthLabel = new javax.swing.JLabel();
+		minimumTextLengthField = new javax.swing.JTextField();
+		substanceIFrameCheck = new javax.swing.JCheckBox();
+		substanceFormCheck = new javax.swing.JCheckBox();
+		substanceButtonCheck = new javax.swing.JCheckBox();
+		substanceTextareaCheck = new javax.swing.JCheckBox();
+		jSeparator1 = new javax.swing.JSeparator();
+		outputPanel = new javax.swing.JPanel();
+		outputFormatPanel = new javax.swing.JPanel();
+		htmlOutputButton = new javax.swing.JRadioButton();
+		textOutputButton = new javax.swing.JRadioButton();
+		htmlOutputPanel = new javax.swing.JPanel();
+		appendLinksCheck = new javax.swing.JCheckBox();
+		textOutputPanel = new javax.swing.JPanel();
+		limitLineBreaksCheck = new javax.swing.JCheckBox();
+		maxLineBreaksLabel = new javax.swing.JLabel();
+		maxLineBreaksField = new javax.swing.JTextField();
+
+		setLayout(new java.awt.BorderLayout());
+
+		setMaximumSize(new java.awt.Dimension(445, 392));
+		settingsTabs.setTabPlacement(javax.swing.JTabbedPane.BOTTOM);
+		ignorePanel.setLayout(new java.awt.GridBagLayout());
+
+		ignoreAdsCheck.setText("Ignore All Advertisements");
+		ignoreAdsCheck.setAlignmentY(0.0F);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 0;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.ipadx = 9;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreAdsCheck, gridBagConstraints);
+
+		ignoreScriptsCheck.setText("Ignore Scripts");
+		ignoreScriptsCheck.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 1;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.ipadx = 55;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreScriptsCheck, gridBagConstraints);
+
+		ignoreNoscriptCheck.setText("Enable <NOSCIPT> tags");
+		ignoreNoscriptCheck.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 4;
+		gridBagConstraints.gridy = 1;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.ipadx = 11;
+		ignorePanel.add(ignoreNoscriptCheck, gridBagConstraints);
+
+		ignoreStylesCheck.setText("Ignore Styles");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 2;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreStylesCheck, gridBagConstraints);
+
+		ignoreStyleInDivCheck.setText("Ignore Style Attribute in <DIV> tags");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 3;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreStyleInDivCheck, gridBagConstraints);
+
+		ignoreImagesCheck.setText("Ignore Non-Link Images");
+		ignoreImagesCheck.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 4;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreImagesCheck, gridBagConstraints);
+
+		ignoreTextLinksCheck.setText("Ignore Text Links");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 6;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreTextLinksCheck, gridBagConstraints);
+
+		ignoreImageLinksCheck.setText("Ignore Image Links");
+		ignoreImageLinksCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 5;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreImageLinksCheck, gridBagConstraints);
+
+		ignoreFormsCheck.setText("Ignore Forms");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 12;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreFormsCheck, gridBagConstraints);
+
+		ignoreMetaCheck.setText("Ignore <META> tags");
+		ignoreMetaCheck.addActionListener(new java.awt.event.ActionListener() {
+			public void actionPerformed(java.awt.event.ActionEvent evt) {
+				ignoreMetaCheckActionPerformed(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 16;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreMetaCheck, gridBagConstraints);
+
+		ignoreInputCheck.setText("Ignore <INPUT> tags");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 13;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreInputCheck, gridBagConstraints);
+
+		ignoreButtonCheck.setText("Ignore <BUTTON> tags");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 14;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreButtonCheck, gridBagConstraints);
+
+		ignoreSelectCheck.setText("Ignore <SELECT> tags");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 15;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreSelectCheck, gridBagConstraints);
+
+		ignoreTableCellWidthsCheck.setText("Ignore Table Cell Widths");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 18;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreTableCellWidthsCheck, gridBagConstraints);
+
+		ignoreIframeCheck.setText("Ignore <IFRAME> tags");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 17;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreIframeCheck, gridBagConstraints);
+
+		displayAltTagsCheck.setText("Display ALT Links");
+		displayAltTagsCheck.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 4;
+		gridBagConstraints.gridy = 4;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		ignorePanel.add(displayAltTagsCheck, gridBagConstraints);
+
+		displayImageLinkAltsCheck.setText("Display ALT Links");
+		displayImageLinkAltsCheck.setEnabled(false);
+		displayImageLinkAltsCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 4;
+		gridBagConstraints.gridy = 5;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		ignorePanel.add(displayImageLinkAltsCheck, gridBagConstraints);
+
+		ignoreEmbedCheck.setText("Ignore <EMBED> tags");
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 19;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		ignorePanel.add(ignoreEmbedCheck, gridBagConstraints);
+
+		settingsTabs.addTab("Ignore Settings", ignorePanel);
+
+		advancedLayoutPanel.setLayout(new java.awt.GridBagLayout());
+
+		ignoreLinkListsCheck.setText("Ignore Link Lists");
+		ignoreLinkListsCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 0;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		advancedLayoutPanel.add(ignoreLinkListsCheck, gridBagConstraints);
+
+		ignoreLLTextLinksCheck.setText("Text Links");
+		ignoreLLTextLinksCheck.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 1;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
+		advancedLayoutPanel.add(ignoreLLTextLinksCheck, gridBagConstraints);
+
+		ignoreOnlyTextAndLinksCheck.setText("Ignore Only Text and Links");
+		ignoreOnlyTextAndLinksCheck.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 3;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
+		advancedLayoutPanel.add(
+			ignoreOnlyTextAndLinksCheck,
+			gridBagConstraints);
+
+		linkTextRatioLabel.setText("Link/Text Removal Ratio");
+		linkTextRatioLabel.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 4;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 10);
+		advancedLayoutPanel.add(linkTextRatioLabel, gridBagConstraints);
+
+		linkTextRatio.setText("0.25");
+		linkTextRatio.setPreferredSize(new java.awt.Dimension(55, 20));
+		linkTextRatio.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 1;
+		gridBagConstraints.gridy = 4;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.ipadx = 50;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 0, 0, 6);
+		advancedLayoutPanel.add(linkTextRatio, gridBagConstraints);
+
+		ignoreLLImageLinksCheck.setText("Image Links");
+		ignoreLLImageLinksCheck.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 2;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
+		advancedLayoutPanel.add(ignoreLLImageLinksCheck, gridBagConstraints);
+
+		removeEmptyTablesCheck.setText("Remove Empty Tables");
+		removeEmptyTablesCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 6;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0);
+		advancedLayoutPanel.add(removeEmptyTablesCheck, gridBagConstraints);
+
+		substanceLabel.setText("Tags to Consider as Substance:");
+		substanceLabel.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 7;
+		gridBagConstraints.gridwidth = 3;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
+		advancedLayoutPanel.add(substanceLabel, gridBagConstraints);
+
+		substanceImageCheck.setText("<IMG>");
+		substanceImageCheck.setEnabled(false);
+		substanceImageCheck.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 8;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
+		advancedLayoutPanel.add(substanceImageCheck, gridBagConstraints);
+
+		substanceLinksCheck.setText("<A>");
+		substanceLinksCheck.setEnabled(false);
+		substanceLinksCheck.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 9;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
+		advancedLayoutPanel.add(substanceLinksCheck, gridBagConstraints);
+
+		substanceInputCheck.setText("<INPUT>");
+		substanceInputCheck.setEnabled(false);
+		substanceInputCheck.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 10;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
+		advancedLayoutPanel.add(substanceInputCheck, gridBagConstraints);
+
+		substanceSelectCheck.setText("<SELECT>");
+		substanceSelectCheck.setEnabled(false);
+		substanceSelectCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 11;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 0);
+		advancedLayoutPanel.add(substanceSelectCheck, gridBagConstraints);
+
+		minimumTextLengthLabel.setHorizontalAlignment(
+			javax.swing.SwingConstants.RIGHT);
+		minimumTextLengthLabel.setText("Minimum Text Length");
+		minimumTextLengthLabel.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 12;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.EAST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 20, 0, 10);
+		advancedLayoutPanel.add(minimumTextLengthLabel, gridBagConstraints);
+
+		minimumTextLengthField.setText("1");
+		minimumTextLengthField.setEnabled(false);
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 1;
+		gridBagConstraints.gridy = 12;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		gridBagConstraints.ipadx = 50;
+		gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
+		gridBagConstraints.insets = new java.awt.Insets(0, 0, 0, 6);
+		advancedLayoutPanel.add(minimumTextLengthField, gridBagConstraints);
+
+		substanceIFrameCheck.setText("<IFRAME>");
+		substanceIFrameCheck.setEnabled(false);
+		substanceIFrameCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 1;
+		gridBagConstraints.gridy = 11;
+		gridBagConstraints.gridwidth = 2;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		advancedLayoutPanel.add(substanceIFrameCheck, gridBagConstraints);
+
+		substanceFormCheck.setText("<FORM>");
+		substanceFormCheck.setEnabled(false);
+		substanceFormCheck.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 1;
+		gridBagConstraints.gridy = 10;
+		gridBagConstraints.gridwidth = 2;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		advancedLayoutPanel.add(substanceFormCheck, gridBagConstraints);
+
+		substanceButtonCheck.setText("<BUTTON>");
+		substanceButtonCheck.setEnabled(false);
+		substanceButtonCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 1;
+		gridBagConstraints.gridy = 9;
+		gridBagConstraints.gridwidth = 2;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		advancedLayoutPanel.add(substanceButtonCheck, gridBagConstraints);
+
+		substanceTextareaCheck.setText("<TEXTAREA>");
+		substanceTextareaCheck.setEnabled(false);
+		substanceTextareaCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 1;
+		gridBagConstraints.gridy = 8;
+		gridBagConstraints.gridwidth = 2;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
+		advancedLayoutPanel.add(substanceTextareaCheck, gridBagConstraints);
+
+		gridBagConstraints = new java.awt.GridBagConstraints();
+		gridBagConstraints.gridx = 0;
+		gridBagConstraints.gridy = 5;
+		gridBagConstraints.gridwidth = java.awt.GridBagConstraints.REMAINDER;
+		gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
+		gridBagConstraints.ipadx = 430;
+		gridBagConstraints.insets = new java.awt.Insets(6, 0, 0, 0);
+		advancedLayoutPanel.add(jSeparator1, gridBagConstraints);
+
+		advancedPanel.add(advancedLayoutPanel);
+
+		settingsTabs.addTab("Advanced Settings", null, advancedPanel, "null");
+
+		outputPanel.setLayout(
+			new javax.swing.BoxLayout(
+				outputPanel,
+				javax.swing.BoxLayout.Y_AXIS));
+
+		outputFormatPanel.setLayout(
+			new javax.swing.BoxLayout(
+				outputFormatPanel,
+				javax.swing.BoxLayout.Y_AXIS));
+
+		outputFormatPanel.setBorder(
+			new javax.swing.border.TitledBorder("Output Format"));
+		outputFormatPanel.setAlignmentX(0.0F);
+		htmlOutputButton.setSelected(true);
+		htmlOutputButton.setText("HTML only");
+		buttonGroupOutput.add(htmlOutputButton);
+		htmlOutputButton.setHorizontalAlignment(
+			javax.swing.SwingConstants.CENTER);
+		htmlOutputButton.setMaximumSize(new java.awt.Dimension(10000, 24));
+		outputFormatPanel.add(htmlOutputButton);
+
+		textOutputButton.setText("Text only");
+		buttonGroupOutput.add(textOutputButton);
+		textOutputButton.setHorizontalAlignment(
+			javax.swing.SwingConstants.CENTER);
+		textOutputButton.setMaximumSize(new java.awt.Dimension(10000, 24));
+		outputFormatPanel.add(textOutputButton);
+
+		outputPanel.add(outputFormatPanel);
+
+		htmlOutputPanel.setLayout(
+			new java.awt.FlowLayout(java.awt.FlowLayout.LEFT));
+
+		htmlOutputPanel.setBorder(
+			new javax.swing.border.TitledBorder("HTML Output Settings"));
+		htmlOutputPanel.setAlignmentX(0.0F);
+		htmlOutputPanel.setMinimumSize(new java.awt.Dimension(100, 59));
+		htmlOutputPanel.setPreferredSize(new java.awt.Dimension(100, 59));
+		appendLinksCheck.setText("Append Links to Bottom of Page");
+		htmlOutputPanel.add(appendLinksCheck);
+
+		outputPanel.add(htmlOutputPanel);
+
+		textOutputPanel.setLayout(null);
+
+		textOutputPanel.setBorder(
+			new javax.swing.border.TitledBorder("Text Output Settings"));
+		textOutputPanel.setAlignmentX(0.0F);
+		textOutputPanel.setPreferredSize(new java.awt.Dimension(0, 240));
+		limitLineBreaksCheck.setText("Limit number of line breaks");
+		limitLineBreaksCheck
+			.addItemListener(new java.awt.event.ItemListener() {
+			public void itemStateChanged(java.awt.event.ItemEvent evt) {
+				selectionEnabler(evt);
+			}
+		});
+
+		textOutputPanel.add(limitLineBreaksCheck);
+		limitLineBreaksCheck.setBounds(10, 25, 180, 24);
+
+		maxLineBreaksLabel.setText("Maximum number of line breaks");
+		textOutputPanel.add(maxLineBreaksLabel);
+		maxLineBreaksLabel.setBounds(10, 50, 183, 16);
+
+		maxLineBreaksField.setColumns(5);
+		maxLineBreaksField.setText("2");
+		textOutputPanel.add(maxLineBreaksField);
+		maxLineBreaksField.setBounds(200, 50, 55, 20);
+
+		outputPanel.add(textOutputPanel);
+
+		settingsTabs.addTab("Output Settings", outputPanel);
+
+		add(settingsTabs, java.awt.BorderLayout.WEST);
+
+	} //GEN-END:initComponents
+
+	private void ignoreMetaCheckActionPerformed(
+		java.awt.event.ActionEvent evt) {
+		//GEN-FIRST:event_ignoreMetaCheckActionPerformed
+		// Add your handling code here:
+	} //GEN-LAST:event_ignoreMetaCheckActionPerformed
+
+	private void selectionEnabler(
+		java.awt.event.ItemEvent evt) { //GEN-FIRST:event_selectionEnabler
+		// Check out what is the source
+		Object source = evt.getItemSelectable();
+
+		if (source == ignoreScriptsCheck) {
+			if (evt.getStateChange() == ItemEvent.DESELECTED)
+				ignoreNoscriptCheck.setEnabled(false);
+			else
+				ignoreNoscriptCheck.setEnabled(true);
+		} //if
+		else if (source == ignoreLinkListsCheck) {
+			if (evt.getStateChange() == ItemEvent.DESELECTED) {
+				ignoreLLTextLinksCheck.setEnabled(false);
+				ignoreLLImageLinksCheck.setEnabled(false);
+				ignoreOnlyTextAndLinksCheck.setEnabled(false);
+				linkTextRatio.setEnabled(false);
+				linkTextRatioLabel.setEnabled(false);
+			} //if
+			else {
+				ignoreLLTextLinksCheck.setEnabled(true);
+				ignoreLLImageLinksCheck.setEnabled(true);
+				ignoreOnlyTextAndLinksCheck.setEnabled(true);
+				linkTextRatio.setEnabled(true);
+				linkTextRatioLabel.setEnabled(true);
+			} //else
+		} //else if
+		else if (source == ignoreImagesCheck) {
+			if (evt.getStateChange() == ItemEvent.DESELECTED) {
+				displayAltTagsCheck.setEnabled(false);
+			} //if
+			else {
+				displayAltTagsCheck.setEnabled(true);
+			}
+		} else if (source == ignoreImageLinksCheck) {
+			if (evt.getStateChange() == ItemEvent.DESELECTED)
+				displayImageLinkAltsCheck.setEnabled(false);
+			else
+				displayImageLinkAltsCheck.setEnabled(true);
+		} else if (source == removeEmptyTablesCheck) {
+			if (evt.getStateChange() == ItemEvent.DESELECTED) {
+				substanceImageCheck.setEnabled(false);
+				substanceLinksCheck.setEnabled(false);
+				substanceInputCheck.setEnabled(false);
+				substanceSelectCheck.setEnabled(false);
+				substanceTextareaCheck.setEnabled(false);
+				substanceButtonCheck.setEnabled(false);
+				substanceFormCheck.setEnabled(false);
+				substanceIFrameCheck.setEnabled(false);
+				substanceLabel.setEnabled(false);
+				minimumTextLengthLabel.setEnabled(false);
+				minimumTextLengthField.setEnabled(false);
+			} //if
+			else {
+				substanceImageCheck.setEnabled(true);
+				substanceLinksCheck.setEnabled(true);
+				substanceInputCheck.setEnabled(true);
+				substanceSelectCheck.setEnabled(true);
+				substanceTextareaCheck.setEnabled(true);
+				substanceButtonCheck.setEnabled(true);
+				substanceFormCheck.setEnabled(true);
+				substanceIFrameCheck.setEnabled(true);
+				substanceLabel.setEnabled(true);
+				minimumTextLengthLabel.setEnabled(true);
+				minimumTextLengthField.setEnabled(true);
+			} //else
+		} //else if
+		else if (source == limitLineBreaksCheck) {
+			if (evt.getStateChange() == ItemEvent.DESELECTED) {
+				maxLineBreaksLabel.setEnabled(false);
+				maxLineBreaksField.setEnabled(false);
+			} else {
+				maxLineBreaksLabel.setEnabled(true);
+				maxLineBreaksField.setEnabled(true);
+			} //if
+		}
+	} //GEN-LAST:event_selectionEnabler
+
+	/**
+	 * Reloads settings GUI
+	 *
+	 * @param iFilter
+	 *            the ContentExtractor
+	 */
+	public void reload(ContentExtractor iFilter) {
+		mFilter = iFilter;
+		loadSettings();
+	}
+
+	/**
+	 * Loads the settings from the ContentExtractor by using accessor methods
+	 */
+	public void loadSettings() {
+		//Use do click to run listeners and setSelected to just set things
+
+		if (mFilter.getSetting(ContentExtractor.ONLY_TEXT).equals("true"))
+			textOutputButton.setSelected(true);
+		if (mFilter.getSetting(ContentExtractor.IGNORE_ADS).equals("true"))
+			ignoreAdsCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_BUTTON_TAGS)
+			.equals("true"))
+			ignoreButtonCheck.setSelected(true);
+		if (mFilter.getSetting(ContentExtractor.IGNORE_FORMS).equals("true"))
+			ignoreFormsCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_IFRAME_TAGS)
+			.equals("true"))
+			ignoreIframeCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_IMAGE_LINKS)
+			.equals("true"))
+			ignoreImageLinksCheck.doClick();
+		;
+		if (mFilter
+			.getSetting(ContentExtractor.DISPLAY_IMAGE_LINK_ALTS)
+			.equals("true"))
+			displayImageLinkAltsCheck.setSelected(true);
+		if (mFilter.getSetting(ContentExtractor.IGNORE_IMAGES).equals("true"))
+			ignoreImagesCheck.doClick();
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_INPUT_TAGS)
+			.equals("true"))
+			ignoreInputCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_LINK_CELLS)
+			.equals("true"))
+			ignoreLinkListsCheck.doClick();
+		if (mFilter
+			.getSetting(ContentExtractor.LC_IGNORE_IMAGE_LINKS)
+			.equals("true"))
+			ignoreLLImageLinksCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.LC_IGNORE_TEXT_LINKS)
+			.equals("true"))
+			ignoreLLTextLinksCheck.setSelected(true);
+		if (mFilter.getSetting(ContentExtractor.IGNORE_META).equals("true"))
+			ignoreMetaCheck.setSelected(true);
+		if (mFilter.getSetting(ContentExtractor.IGNORE_SCRIPTS).equals("true"))
+			ignoreScriptsCheck.doClick();
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_NOSCRIPT_TAGS)
+			.equals("true"))
+			ignoreNoscriptCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_SELECT_TAGS)
+			.equals("true"))
+			ignoreSelectCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_DIV_STYLES)
+			.equals("true"))
+			ignoreStyleInDivCheck.setSelected(true);
+		if (mFilter.getSetting(ContentExtractor.IGNORE_STYLES).equals("true"))
+			ignoreStylesCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_CELL_WIDTH)
+			.equals("true"))
+			ignoreTableCellWidthsCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_TEXT_LINKS)
+			.equals("true"))
+			ignoreTextLinksCheck.setSelected(true);
+		linkTextRatio.setText(
+			mFilter.getSetting(ContentExtractor.LINK_TEXT_REMOVAL_RATIO));
+		if (mFilter
+			.getSetting(ContentExtractor.DISPLAY_IMAGE_ALTS)
+			.equals("true"))
+			displayAltTagsCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.LC_ONLY_LINKS_AND_TEXT)
+			.equals("true"))
+			ignoreOnlyTextAndLinksCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.REMOVE_EMPTY_TABLES)
+			.equals("true"))
+			removeEmptyTablesCheck.doClick();
+		if (mFilter
+			.getSetting(ContentExtractor.SUBSTANCE_BUTTON)
+			.equals("true"))
+			substanceButtonCheck.setSelected(true);
+		if (mFilter.getSetting(ContentExtractor.SUBSTANCE_FORM).equals("true"))
+			substanceFormCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.SUBSTANCE_IFRAME)
+			.equals("true"))
+			substanceIFrameCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.SUBSTANCE_IMAGE)
+			.equals("true"))
+			substanceImageCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.SUBSTANCE_INPUT)
+			.equals("true"))
+			substanceInputCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.SUBSTANCE_LINKS)
+			.equals("true"))
+			substanceLinksCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.SUBSTANCE_SELECT)
+			.equals("true"))
+			substanceSelectCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.SUBSTANCE_TEXTAREA)
+			.equals("true"))
+			substanceTextareaCheck.setSelected(true);
+		minimumTextLengthField.setText(
+			mFilter.getSetting(ContentExtractor.SUBSTANCE_MIN_TEXT_LENGTH));
+		if (mFilter
+			.getSetting(ContentExtractor.ADD_LINKS_TO_BOTTOM)
+			.equals("true"))
+			appendLinksCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.IGNORE_EMBED_TAGS)
+			.equals("true"))
+			ignoreEmbedCheck.setSelected(true);
+		if (mFilter
+			.getSetting(ContentExtractor.LIMIT_LINEBREAKS)
+			.equals("true"))
+			limitLineBreaksCheck.doClick();
+		maxLineBreaksField.setText(
+			mFilter.getSetting(ContentExtractor.MAX_LINEBREAKS));
+	}
+
+	/**
+	 * Commits the settings so that the ContentExtractor reflects the user's
+	 * specifications.
+	 */
+	public void commitSettings() {
+		mFilter.changeSetting(
+			ContentExtractor.ONLY_TEXT,
+			Boolean.toString(textOutputButton.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_ADS,
+			Boolean.toString(ignoreAdsCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_BUTTON_TAGS,
+			Boolean.toString(ignoreButtonCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_FORMS,
+			Boolean.toString(ignoreFormsCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_IFRAME_TAGS,
+			Boolean.toString(ignoreIframeCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_IMAGE_LINKS,
+			Boolean.toString(ignoreImageLinksCheck.isSelected()));
+		if (ignoreImageLinksCheck.isSelected())
+			mFilter.changeSetting(
+				ContentExtractor.DISPLAY_IMAGE_LINK_ALTS,
+				Boolean.toString(displayImageLinkAltsCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_TEXT_LINKS,
+			Boolean.toString(ignoreTextLinksCheck.isSelected()));
+
+		if (ignoreImagesCheck.isSelected()) {
+			mFilter.changeSetting(
+				ContentExtractor.DISPLAY_IMAGE_ALTS,
+				Boolean.toString(displayAltTagsCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.IGNORE_IMAGES,
+				Boolean.toString(ignoreImagesCheck.isSelected()));
+		} else
+			mFilter.changeSetting(
+				ContentExtractor.IGNORE_IMAGES,
+				Boolean.toString(ignoreImagesCheck.isSelected()));
+
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_INPUT_TAGS,
+			Boolean.toString(ignoreInputCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_LINK_CELLS,
+			Boolean.toString(ignoreLinkListsCheck.isSelected()));
+
+		if (ignoreLinkListsCheck.isSelected()) {
+			mFilter.changeSetting(
+				ContentExtractor.LC_IGNORE_IMAGE_LINKS,
+				Boolean.toString(ignoreLLImageLinksCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.LC_IGNORE_TEXT_LINKS,
+				Boolean.toString(ignoreLLTextLinksCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.LINK_TEXT_REMOVAL_RATIO,
+				linkTextRatio.getText());
+			mFilter.changeSetting(
+				ContentExtractor.LC_ONLY_LINKS_AND_TEXT,
+				Boolean.toString(ignoreOnlyTextAndLinksCheck.isSelected()));
+		}
+
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_META,
+			Boolean.toString(ignoreMetaCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_SCRIPTS,
+			Boolean.toString(ignoreScriptsCheck.isSelected()));
+
+		if (ignoreNoscriptCheck.isSelected())
+			mFilter.changeSetting(
+				ContentExtractor.IGNORE_NOSCRIPT_TAGS,
+				Boolean.toString(ignoreNoscriptCheck.isSelected()));
+
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_SELECT_TAGS,
+			Boolean.toString(ignoreSelectCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_DIV_STYLES,
+			Boolean.toString(ignoreStyleInDivCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_STYLES,
+			Boolean.toString(ignoreStylesCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_CELL_WIDTH,
+			Boolean.toString(ignoreTableCellWidthsCheck.isSelected()));
+
+		if (removeEmptyTablesCheck.isSelected()) {
+			mFilter.changeSetting(
+				ContentExtractor.REMOVE_EMPTY_TABLES,
+				Boolean.toString(removeEmptyTablesCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_BUTTON,
+				Boolean.toString(substanceButtonCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_FORM,
+				Boolean.toString(substanceFormCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_IFRAME,
+				Boolean.toString(substanceIFrameCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_IMAGE,
+				Boolean.toString(substanceImageCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_INPUT,
+				Boolean.toString(substanceInputCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_LINKS,
+				Boolean.toString(substanceLinksCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_SELECT,
+				Boolean.toString(substanceSelectCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_TEXTAREA,
+				Boolean.toString(substanceTextareaCheck.isSelected()));
+			mFilter.changeSetting(
+				ContentExtractor.SUBSTANCE_MIN_TEXT_LENGTH,
+				minimumTextLengthField.getText());
+		} //if
+		else
+			mFilter.changeSetting(
+				ContentExtractor.REMOVE_EMPTY_TABLES,
+				Boolean.toString(removeEmptyTablesCheck.isSelected()));
+
+		mFilter.changeSetting(
+			ContentExtractor.IGNORE_EMBED_TAGS,
+			Boolean.toString(ignoreEmbedCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.ADD_LINKS_TO_BOTTOM,
+			Boolean.toString(appendLinksCheck.isSelected()));
+		mFilter.changeSetting(
+			ContentExtractor.LIMIT_LINEBREAKS,
+			Boolean.toString(limitLineBreaksCheck.isSelected()));
+		if (limitLineBreaksCheck.isSelected())
+			mFilter.changeSetting(
+				ContentExtractor.MAX_LINEBREAKS,
+				maxLineBreaksField.getText());
+
+		mFilter.saveSettings();
+	}
+
+	public String getTabName() {
+		return TAB_NAME;
+	}
+
+	public void revertSettings() {
+		this.removeAll();
+		initComponents();
+		loadSettings();
+		revalidate();
+	}
+
+	// Variables declaration - do not modify//GEN-BEGIN:variables
+	private javax.swing.JCheckBox ignoreLinkListsCheck;
+	private javax.swing.JCheckBox substanceImageCheck;
+	private javax.swing.JPanel advancedLayoutPanel;
+	private javax.swing.JSeparator jSeparator1;
+	private javax.swing.JTextField maxLineBreaksField;
+	private javax.swing.JCheckBox substanceInputCheck;
+	private javax.swing.JCheckBox ignoreNoscriptCheck;
+	private javax.swing.JLabel maxLineBreaksLabel;
+	private javax.swing.JCheckBox substanceButtonCheck;
+	private javax.swing.JLabel substanceLabel;
+	private javax.swing.JRadioButton textOutputButton;
+	private javax.swing.JCheckBox substanceLinksCheck;
+	private javax.swing.JCheckBox substanceTextareaCheck;
+	private javax.swing.JRadioButton htmlOutputButton;
+	private javax.swing.JCheckBox substanceSelectCheck;
+	private javax.swing.JCheckBox ignoreOnlyTextAndLinksCheck;
+	private javax.swing.JTextField minimumTextLengthField;
+	private javax.swing.JCheckBox ignoreImageLinksCheck;
+	private javax.swing.JCheckBox ignoreLLImageLinksCheck;
+	private javax.swing.JPanel htmlOutputPanel;
+	private javax.swing.JCheckBox substanceIFrameCheck;
+	private javax.swing.JLabel minimumTextLengthLabel;
+	private javax.swing.JPanel ignorePanel;
+	private javax.swing.JCheckBox ignoreScriptsCheck;
+	private javax.swing.JCheckBox ignoreMetaCheck;
+	private javax.swing.JCheckBox ignoreInputCheck;
+	private javax.swing.JLabel linkTextRatioLabel;
+	private javax.swing.ButtonGroup buttonGroupOutput;
+	private javax.swing.JCheckBox ignoreTextLinksCheck;
+	private javax.swing.JCheckBox ignoreLLTextLinksCheck;
+	private javax.swing.JCheckBox limitLineBreaksCheck;
+	private javax.swing.JCheckBox ignoreButtonCheck;
+	private javax.swing.JTabbedPane settingsTabs;
+	private javax.swing.JCheckBox ignoreImagesCheck;
+	private javax.swing.JCheckBox ignoreSelectCheck;
+	private javax.swing.JCheckBox removeEmptyTablesCheck;
+	private javax.swing.JCheckBox displayImageLinkAltsCheck;
+	private javax.swing.JCheckBox ignoreAdsCheck;
+	private javax.swing.JCheckBox displayAltTagsCheck;
+	private javax.swing.JCheckBox ignoreEmbedCheck;
+	private javax.swing.JCheckBox ignoreTableCellWidthsCheck;
+	private javax.swing.JCheckBox ignoreStyleInDivCheck;
+	private javax.swing.JPanel textOutputPanel;
+	private javax.swing.JCheckBox ignoreStylesCheck;
+	private javax.swing.JCheckBox substanceFormCheck;
+	private javax.swing.JCheckBox ignoreIframeCheck;
+	private javax.swing.JPanel outputFormatPanel;
+	private javax.swing.JCheckBox ignoreFormsCheck;
+	private javax.swing.JCheckBox appendLinksCheck;
+	private javax.swing.JTextField linkTextRatio;
+	private javax.swing.JPanel outputPanel;
+	private javax.swing.JPanel advancedPanel;
+	// End of variables declaration//GEN-END:variables
+
 }