/*
* [AbstractGlossCustomiser.java]
*
* Summary: base class for Classes to customise how glossary indexes are handled.
*
* Copyright: (c) 2007-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.8+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 2.2 2007-04-27 use enum for each embellishment letter. use CSS for single icons.
* associated names for each entry:
- the file: This has to follow OS naming conventions. For
* safety we use only
* lower case \\p{Lower}.html The other restriction is obviously that filenames must be unique.
*
- the anchor: This has
* follow HTML rules. For safety, we use only only UPPER case A-Z. The other obvious restriction is
* that anchor names
* must be unique within at file.
- the entry description: This has to follow no rules other
* than it will be
* sorted alphabetically including punctuation. It contains punctuation, spaces,
* upper and lower case. Our sort is
* case-insensitive.
* The other obvious restriction is that names should be unique within a glossary. Theoretically
* everything will still
* work, but it will be confusing for the end user if there are duplicate index entries pointing to
* different text.
* HowToProcess are they to tell which is which?
* The main error even I sometimes make is getting confused in thinking we sort the index by filename
* or anchor name. We
* don't. We always sort by the entry description. The filenames and anchor names are all but
* irrelevant. However, using
* a name closely related to the description makes it easy to remember. These create your urls e.g.
* you might post:
* Please have a look at my website : http://mindprod.com/jgloss/regex.html#REGEX
* You want them to be easy to remember. Usually the three names use the same root e.g. "jar.html"
* "JAR" "jar" They can
* be entirely different: e.g. dotcom.html "DOTCOMCOMPANIES" ".com companies" The index we create
* contains all three
* names: as a link from the description to the file and target/anchor. e.g.
* some entry description
* The index also exists in binary form in alphabetical order in the *gloss.ser files used by the Go
* Applet for direct
* jump to an entry.
*/
package com.mindprod.qf;
import com.mindprod.common18.ST;
import com.mindprod.htmlmacros.macro.Global;
/**
* base class for Classes to customise how glossary indexes are handled.
*
* @author Roedy Green, Canadian Mind Products
* @version 2.2 2007-04-27 use enum for each embellishment letter. use CSS for single icons.
* associated names for each entry: - the file: This has to follow OS naming conventions. For safely
* we use only
* lower case \\p{Lower}.html The other restriction is obviously that filenames must be unique.
-
* the anchor: This has
* follow HTML rules. For safety, we use only only UPPER case A-Z. The other obvious restriction is that
* anchor names
* must be unique within at file.
- the entry description: This has to follow no rules other than it
* will be
* sorted alphabetically including punctuation. It contains punctuation, spaces,
* upper and lower case. Our sort is
* case-insensitive.
* The other obvious restriction is that names should be unique within a glossary. Theoretically everything
* will still
* work, but it will be confusing for the end user if there are duplicate index entries pointing to
* different text.
* HowToProcess are they to tell which is which?
* The main error even I sometimes make is getting confused in thinking we sort the index by filename or
* anchor name. We
* don't. We always sort by the entry description. The filenames and anchor names are all but irrelevant.
* However, using
* a name closely related to the description makes it easy to remember. These create your urls e.g. you
* might post:
* Please have a look at my website : http://mindprod.com/jgloss/regex.html#REGEX
* You want them to be easy to remember. Usually the three names use the same root e.g. "jar.html" "JAR"
* "jar" They can
* be entirely different: e.g. dotcom.html "DOTCOMCOMPANIES" ".com companies" The index we create contains
* all three
* names: as a link from the description to the file and target/anchor. e.g.
* some entry description
* The index also exists in binary form in alphabetical order in the *gloss.ser files used by the obsolete
* Go Applet, for direct jump to an entry, and to help SeeSort repair see links without urls.
* @since 2007
*/
public abstract class AbstractGlossCustomiser
{
/**
* directory where we put the generated indexes. Also where embellishments live
*
* @return directory when output files go files are, WITH TRAILING /.
*/
protected abstract String getIncludeDirName();
/**
* Get anchor for letterCode, just the name
*
* @param letterCode uppercase Letter of the alphabet this index file will cover. 0=digits *=punct ~= master index
*
* @return anchor name eg A B C DIGITS PUNCT
*/
public String getAnchorForLetterCode( char letterCode )
{
switch ( letterCode )
{
case '0':
return "DIGITS";
case '*':
return "PUNCT";
case '~':
return "INDEX";
default:
return String.valueOf( letterCode );
}
}
/**
* base name of glossary, e.g. lgloss jgloss used to build other names.
*
* @return name of glossary
*/
public abstract String getBaseName();
/**
* Get title letter index file
*
* @param letterCode Letter of he alphabet this index file will cover.
*
* @return title
*/
public String getDescriptionForLetterCode( char letterCode )
{
switch ( letterCode )
{
case '*':
return "punctuation words";
case '0':
return "number words";
case '~':
return "Master Index";
default:
return letterCode + " words";
}
}
/**
* estimate of how many entries in the index. Will be smaller than the number of files since not every file defines
* a term. We don't need a companion method for estimated number of files since QF never creates any entry per file
* tables.
*
* @return estimated number of index entries in this glossary
*/
public int getEstimatedIndexEntries()
{
return ( 5000 );
}
/**
* Get filename for letterCode index file, without directory.
*
* @param letterCode uppercase Letter of the alphabet this index file will cover. 0=digits *=punct ~= master index
*
* @return name of file e.g. "a.html" "0-9.html" "punct.html"
*/
public String getFilenameForLetterCode( char letterCode )
{
switch ( letterCode )
{
case ' ':
return "";
case '*':
return "punct.html";
case '0':
return "0-9.html";
case '~':
return "masterindex.html";
default:
return String.valueOf( Character.toLowerCase( letterCode ) )
+ ".html";
}
}
/**
* get glossary enum associated with this embellisher
*
* @return glossary
*/
public abstract Gloss getGlossEnum();
/**
* Get directory when input files are,
*
* @return directory when input files to scan and index are, with trailing /.
*/
public abstract String getInputDirName();
/**
* convert first letter of word being indexed to the letter category of the index
*
* @param letter first letter of word being indexed. This has nothing to do with the name of the file the entry
* lives in. May be upper lower case, number, letter, punct. A-Z 0=0-9 *=punct some punct letters will
* not do for matching filenames, bad: / . $ \ * + ; : " # % & ' < > ? [] {} ^ ~ _ safe starting file
* name letters are \\p{Lower} -
*
* @return char letter category 0 A-Z
*/
public char getLetterCode( char letter )
{
letter = Character.toUpperCase( letter );
if ( '0' <= letter && letter <= '9' )
{
return '0';
}
else if ( 'A' <= letter && letter <= 'Z' )
{
return letter;
}
else if ( '!' <= letter && letter <= '/' )
{
return '*';
}
else if ( ':' <= letter && letter <= '@' )
{
return '*';
}
else if ( '[' <= letter && letter <= '^' )
{
return '*';
}
else if ( letter == '`' )
{
return '*';
}
else if ( '{' <= letter && letter <= '~' )
{
return '*';
}
else
{
return '!';
}
}
/**
* Get wording for link to that page.
*
* @param letterCode Letter of the alphabet this index file will cover. this has nothing to do with the first letter
* of the filename being indexed or the anchor being indexed. It is the letter for the chunk of
* the alphabet being indexed that will generate one index file.
*
* @return short title A B 0-9 * ~ Master Index
*/
public String getLinkNameForLetterCode( char letterCode )
{
switch ( letterCode )
{
case '*':
return "*";
case '0':
return "0-9";
case '~':
return "Master Index";
default:
return String.valueOf( letterCode );
}
}
/**
* Get unqualified name of the next input file we are scanning for anchors/terms
*
* @return unqualified filename. Watch \ which must be doubled in Java Strings. Indicates has no more files to
* process either by repeating the last filename, or by returning null.
*/
public abstract String getNextInFilename();
/**
* Name of directory where we put serialised resources to be included in jars.
*
* @return fully qualified output directory, with trailing /.
*/
@SuppressWarnings( { "SameReturnValue" } )
public String getResourceDirName()
{
assert Global.configuration != null : "AbstractGlossCustomiser needs Global.configuration";
return Global.configuration.getSourceDirWithSlashes() + "/qf/";
}
/**
* Get fully qualified table name where generated file will go. We don't write on top of the original just yet.
*
* @param letterCode uppercase Letter of the alphabet this index file will cover. 0=digits *=punct ~= master index
*
* @return name of table file with generated index e.g. "a.guts.htmlfrag" "0-9.guts.htmlfrag" "punct.guts.htmlfrag" "masterindex.guts.htmlfrag"
*/
public String getTablenameForLetterCode( char letterCode )
{
String tableName = getFilenameForLetterCode( letterCode );
// chop off .html and add .guts.htmlfrag
tableName =
ST.chopTrailingString( tableName, ".html" ) + ".guts.htmlfrag";
return getIncludeDirName() + getBaseName() + '/' + tableName;
}
/**
* Get title for letter index file
*
* @param letterCode upper case letter of the alphabet this index file will cover.
*
* @return title
*/
public String getTitleForLetterCode( char letterCode )
{
switch ( letterCode )
{
case '*':
return "Index for punctuation";
case '0':
return "Index for the digits 0-9";
case '~':
return "Master Index";
default:
return "Index for the letter " + letterCode;
}
}
/**
* Do we need to produce index pages for each individual letter of the alphabet?
*
* @return true if individual indexes needed.
*/
public boolean needIndividualLetterIndexes()
{
return false;
}
} // end AnchorsCustomiser