/* * [CollectBornsInBookMacros.java] * * Summary: Find Author/Birth/Death in Book Macros. * * Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2011-10-18 initial version */ package com.mindprod.repair; import com.mindprod.entities.DeEntifyStrings; import java.io.File; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Find Author/Birth/Death in Book Macros. *

* Does not change the html files. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2011-10-18 initial version * @since 2011-10-18 */ class CollectBornsInBookMacros { /** * pattern of quote already handled, used to extract author in {} */ private static final Pattern AUTHOR_PATTERN_IN_BOOK_MACRO_IN_BRACES = Pattern.compile( "author=\\{(.+?)\\}", Pattern.DOTALL ); /** * pattern of quote already handled, used to extract author in " " */ private static final Pattern AUTHOR_PATTERN_IN_BOOK_MACRO_IN_QUOTES = Pattern.compile( "author=\"(.+?)\"", Pattern.DOTALL ); /** * pattern birth="yyyy-mm-dd" */ private static final Pattern BIRTH_PATTERN_IN_BOOK_MACRO = Pattern.compile( "birth=\"(.+?)\"" ); /** * look for */ private static final Pattern BOOK_MACRO_PATTERN = Pattern.compile( "", Pattern.DOTALL ); /** * death=yyyy-mm-dd */ private static final Pattern DEATH_PATTERN_IN_BOOK_MACRO = Pattern.compile( "death=\"(.+?)\"" ); /** * pattern of quote already handled, used to extract author */ private static final Pattern MULTI_AUTHOR_SPLIT = Pattern.compile( "\\s+and\\s+|\\s+with\\s+|\\s+aka\\s+|,|&|\\[|\\]" ); /** * find all BookMacros on page. Add details of authors to HashMap * * @param page the whole page * @param fileBeingProcessed file we are processing * . */ // CollectBornsInBookMacros static void collectBornsInBookMacroForPage( final String page, final File fileBeingProcessed ) { Matcher m = BOOK_MACRO_PATTERN.matcher( page ); while ( m.find() ) { collectBornsInBookMacrosForPage( m.group( 1 ), fileBeingProcessed ); } } // Stephen Harper /** * find all blockquotes on page. Give new CSS classes. Add details of authors to HashMap * * @param chunk the whole blockquote with head and teal tags trimmed off. * @param fileBeingProcessed file we are processing * . */ private static void collectBornsInBookMacrosForPage( final String chunk, final File fileBeingProcessed ) { // if it is not there, no problem. We will detect it and complain later in repair if we can't fix it. if ( chunk.contains( "birth=" ) ) { final Matcher a1 = AUTHOR_PATTERN_IN_BOOK_MACRO_IN_QUOTES.matcher( chunk ); final Matcher a2 = AUTHOR_PATTERN_IN_BOOK_MACRO_IN_BRACES.matcher( chunk ); final Matcher b = BIRTH_PATTERN_IN_BOOK_MACRO.matcher( chunk ); final Matcher d = DEATH_PATTERN_IN_BOOK_MACRO.matcher( chunk ); boolean founda1 = a1.find(); boolean founda2 = a2.find(); if ( ( founda1 || founda2 ) && b.find() ) { final Matcher a = founda1 ? a1 : a2; final String allAuthors = a.group( 1 ); final String[] individualAuthors = MULTI_AUTHOR_SPLIT.split( allAuthors ); final String author = individualAuthors.length >= 1 ? individualAuthors[ 0 ].trim() : ""; final String authorFlat = DeEntifyStrings.flattenHTML( author, ' ' ); final AuthorBio old = AuthorBio.get( authorFlat ); // we collect unknowns too. String birthdate = b.group( 1 ); // found done earlier String deathdate = d.find() ? d.group( 1 ) : ""; final AuthorBio recent = new AuthorBio( author, birthdate, deathdate, fileBeingProcessed ); if ( old == null ) { AuthorBio.put( authorFlat, recent ); } else { if ( !( old.getBirthdate().equals( birthdate ) && old.getDeathdate().equals( deathdate ) ) ) { AuthorWarning.warn( "inconsistent dates in book macro", old, recent ); } // if matches, do nothing. We already have all their is to know about the author on fil. } } } } }