/* PROGRAM BlOut 2.8: convert 2 or more blank lines to 1 or removes them altogether. * * Lines of all blanks or with just CrLf count as a blank line. * Also trims trailing blanks. Also trims all blank lines from the beginning * and end of the files. We assume some ASCII-like 8-bit encoding, * Not Unicode 16. Ok for UTF-8 since space, \r \n all have 8-bit codes. * * Compiled as a Windows console App, without precompiled headers. * For Visual C++ Express options see http://mindprod.com/jgloss/cpp.html#COMPILING * * usage : BlOut [-compact] Myfile.Txt * Version History: * 1.0 1993-11-09 Roedy Green in Microsoft Visual C 8 * 1.1 1996-11-08 extra "good" extensions * 1.2 1997-03-10 extra "good" extensions HTM, INI, SQL * 1.3 2003-05-09 extra "good" extensions CSV * 1.4 2004-05-20 converted to Win32 app. * new ensures file has terminating \n. * 1.5 2004-06-01 handles *.html files * avoid close of unopen files. * 1.6 2005-07-23 add more good and bad extensions * 1.7 2007-07-23 add xsd, xsdfrag, dtd, dtdfrag extensions * 1.8 2008-01-02 convert to POSIX and safe char handling library. * 1.9 2008-01-03 fix time file placement, display banner on all errors * 2.0 2008-01-04 add pml extension as good. * 2.1 2009-03-11 add ANT build script * 2.2 2009-05-05 ad pml to list of safe extensions * 2.3 2010-02-03 tidy files with badly damaged NLs. Use Java naming conventions. Treats lines of tabs and spaces as empty. * 2.4 2010-02-11 add .mftfrag, .mf, .mffrag, .sf, .sffrag, .http, .httpfrag as good extensions. * 2.5 2010-02-18 trim lead and trail blank lines from files. Handle up to 100 runs of leading tab/spaces on lines. * 2.6 2011-01-16 nows allows multiple files on the command line, and the -compact option. * 2.7 2014-04-29 lists of extensions now do not include ., Kept in sync with Java ExtensionListFilter. * 2.8 2016-08-24 add more good/bad extensions. */ /* ==================================== Roedy Green Canadian Mind Products #101 - 2536 Wark Street Victoria, BC Canada V8T 4G8 tel:(250) 361-9093 mailto:roedyg@mindprod.com http://mindprod.com */ #include /* exit, putenv, _splitpath */ #include /* isUpper isLower toUpper */ #include /* fclose, fgetc, printf, remove, rename, setvbuf */ #include #include /* getch, putch */ #include /* P R O T O T Y P E S */ int main (int argc, char *argv[]); void banner (void); void die (void); void honk (void); void safeFilename (void); /* ==================================== */ #define esc '\x1B' /* maximum number of tabs/space runs leading a line we can handle */ #define maxDepth 100 /* use all global variables and no parameter passing for simplicity. */ FILE *before; /* input file containing no ^Z chars, except * possibly at the end */ FILE *after; /* output file with comments converted */ /* name of file we will convert */ const char *bFilename; /* pointer to string name of file */ /* name of the temporary output file */ char *aFilename; /* How many NLs in a row you are willing to tolerate. 2 = no more than one blank line, 1 = no blank lines */ int tolerate; /* ==================================== */ void safeFilename() { /* Ensure appropriate file name extensions. good =.ASM .PAS .etc - done without prompt bad =.EXE .COM .etc - abort warning =.DOC & others */ static const char * goodExtensions [] = { "ans", "asm", "bat", "batfrag", "btm", "btmfrag", "c", "cfrag", "cmd", "cpp", "cppfrag", "css", "cssfrag", "csv", "csvfrag", "ctl", "doc", "dtd", "dtdfrag", "e", "h", "hfrag", "hpp", "hppfrag", "htm", "htmfrag", "html", "htmlfrag", "http", "httpfrag", "ih", "iml", "ini", "ion", "java", "javafrag", "jnlp", "jnlpfrag", "jsp", "jspfrag", "list", "log", "lst", "mac", "mf", "mffrag", "mft", "mftfrag", "pas", "pml", "policy", "prn", "properties", "ps", "rh", "sf", "sffrag", "sh", "site", "sql", "sqlfrag", "svg", "tab", "text", "txt", "use", "wiki", "xml", "xmlfrag", "xsd", "xsdfrag", /* 0 is just end marker */ 0}; static const char * badExtensions [] = { "au", "7z", "blk", "bmp", "bod", "bz", "bz2", "bzip2", "class", "com", "dat", "digest", "dll", "doc", "exe", "gif", "gz", "ico", "jar", "jpeg", "jpg", "mbx", "name", "obj", "png", "p7b", "png", "rar", "seq", "ser", "so", "sym", "tbz", "tbz2", "toc", "usg", "zip", "zipx", /* 0 is just end marker */ 0}; int response; /* Y or N, yes Virginia, int, C is weird */ /* extension of the file we are about to process */ char extension[_MAX_EXT]; int i; /* local loop counter */ _splitpath_s( bFilename, NULL /* drive */, 0, NULL /* dir */, 0, NULL /* name */, 0, extension, _MAX_EXT); _strlwr_s(extension, _MAX_EXT); /* convert to lower case for compare */ for ( i=0 ; goodExtensions[i]; i++ ) { /* avoid lead . 8-bit chars */ if ( strcmp(extension+1 ,goodExtensions[i])==0 ) { /* match, it is Good */ return; } } for ( i=0 ; badExtensions[i] ; i++ ) { /* avoid lead . 8-bit chars */ if ( strcmp(extension+1,badExtensions[i])==0 ) { /* match, it is bad */ banner(); printf("Oops! blout.exe cannot be used on EXE COM or OBJ files " "such as %s\n", bFilename); die(); /* we are doing only one file per instance, so dying will not stop the run */ } } /* just give a warning */ printf("Warning!\n" /* new line to give room for long filename */ "blout.exe is not usually used on %s files such as [%s]\n", extension,bFilename); printf("Do you want to continue and process anyway?" " (Y)es (N)o "); while ( 1 ) { /* loop forever till user enters Y or N */ honk(); response = _getch(); /* not echoed because user might hit tab or Enter */ /* and mess up the screen */ response = toupper(response); /* toupper is a macro, so needs simple argument */ switch ( response ) { case 'Y': return; case 'N': case esc : printf("\nblout.exe aborted\n"); die(); /* others, keep looping */ } } } /* safeFilename */ /* ==================================== */ void bldAFilename(void) { /* create temp file in same directory as bFilename */ char drive[_MAX_DRIVE]; char dir[_MAX_DIR]; char name[_MAX_FNAME]; char ext[_MAX_EXT]; char filepath[_MAX_DRIVE + _MAX_DIR]; _splitpath_s(bFilename, drive, _MAX_DRIVE, dir, _MAX_DIR, name, _MAX_FNAME, ext, _MAX_EXT); strcpy_s(filepath, _MAX_DRIVE + _MAX_DIR, drive); strcat_s(filepath, _MAX_DRIVE + _MAX_DIR, dir); /* Force to current directory if empty */ if ( strcmp(filepath, "") == 0 ) { strcpy_s(filepath,_MAX_DRIVE + _MAX_DIR, "."); } else { filepath[strlen(filepath) - 1] = 0; } _putenv("TMP="); /* Sets TMP just for this and any "children" */ /* processes -- doesn't change parents TMP */ if ( (aFilename = _tempnam(filepath, "")) == NULL ) { banner(); printf("Oops! Cannot create the temporary work file\n\a"); exit(1); } } void banner(void) { /* display copyright banner, with shaded splashes in IBMOEM encoding */ printf("\n\xb0\xb1\xb2\xdb" " BlOut 2.8" "\xdb\xb2\xb1\xb0" "\nFreeware remove excess blank lines from text files." "\nCopyright: (c) 1993-2017 Roedy Green, Canadian Mind Products" "\n#101 - 2536 Wark Street, Victoria, BC Canada V8T 4G8" "\ntel:(250) 361-9093 mailto:roedyg@mindprod.com http://mindprod.com" "\nMay be used freely for non-military use only" "\n\n"); } /* banner */ /* ==================================== */ void honk (void) { /* make a noise */ printf("\a"); } /* =================================== */ void die (void) { honk(); if ( before ) { fclose(before); } if ( after ) { fclose(after); } exit(1); /* exit with errorlevel = 1 */ } /* die */ /* ==================================== */ void bloutOneFile( const char * filename ) { int c; /* the character just read */ int i; /* loop counter */ int pendingTabs[ maxDepth+1 ]; /* how many tabs in Nth run of leading tabs */ int pendingSpaces [ maxDepth+1 ]; /* how many spacels in Nth run of leading spaces */ int depth = 0; /* how many runs of tabs/spaces there are. 0 is the usual case, just spaces, just tabs, or tabs then spaces. */ int pendingNLs = 0; /* how many NLs we have read without writing them out */ int nonWhiteSpaceCharsSeen = 0; pendingSpaces [ 0 ] = 0; pendingTabs [ 0 ] = 0; bFilename = filename; safeFilename(); bldAFilename(); if ( fopen_s(&before, bFilename, "rt") ) { banner(); printf("Oops! Cannot open file %s\n\a", bFilename); exit(1); } if ( fopen_s(&after, aFilename, "wt") ) { banner(); printf("Oops! Cannot open work file %s\n\a", aFilename); exit(1); } setvbuf(before, NULL, _IOFBF, 8192); setvbuf(after, NULL, _IOFBF, 8192); // C runtime converts \r\n --> \n on input and \n --> \r\n on output. while ( (c = fgetc(before)) != EOF ) { switch ( c ) { case ' ': pendingSpaces[ depth ]++; break; case '\t': if ( pendingSpaces[ depth ] && depth < maxDepth ) { depth++; pendingSpaces [ depth ] = 0; pendingTabs [ depth ] = 1; } else { pendingTabs[ depth ]++; } break; case '\n': depth = 0; pendingSpaces [ 0 ] = 0; pendingTabs [ 0 ] = 0; pendingNLs++; break; case '\r': /* skip any stray \r */ case '\x1a': /* Skip any ^z */ break; default: /* alpha, numeric, punctuation, control chars */ // emit pending NLs if ( nonWhiteSpaceCharsSeen ) { if ( pendingNLs > tolerate ) { pendingNLs = tolerate; } while ( pendingNLs > 0 ) { fputc( '\n', after ); pendingNLs--; } } else { /* prune all NLs off the top of the file */ pendingNLs = 0; } // emit alternating runs of tabs and spaces. for ( i=0; i<=depth; i++ ) { while ( pendingTabs[i] > 0 ) { fputc( '\t', after ); pendingTabs[i]--; } while ( pendingSpaces[i] > 0 ) { fputc( ' ', after ); pendingSpaces[i]--; } } depth = 0; // emit the non whitespace char fputc(c, after); nonWhiteSpaceCharsSeen = 1; break; } /* end switch */ } /* end for reading characters */ // make sure there was a terminating \n // no matter what is pending, terminate the file with single \n // If file is pure whitespace, we produce a completely empty file. if ( nonWhiteSpaceCharsSeen ) { fputc('\n', after); } /* Rename output to input */ if ( before ) { fclose(before); } if ( after ) { fclose(after); } remove(bFilename); rename(aFilename, bFilename); } /* ==================================== */ int main(int argc, char *argv[]) { int i; tolerate = 2; // collapse blank line to 1 /* 0=blout.Exe 1=MyFile.C */ if ( argc < 2 ) { banner(); printf( "Oops! Usage: blout.exe [-compact] Myfile.c anotheFile.txt\n\a" ); exit( 1 ); } /* process each file on the command line */ for ( i=1; i