/* Align converts tabs to spaces, and aligns columns. usage : Align Myfile.TXT - aligns comma or space-separated data in columns. - expands tabs to spaces - trims off ^z Eof char - ensures each line ends in CrLf - inserts space to separate columns, removing commas. - handles spaces embedded in C-style comments or quotes as part of the same column. Align 1.5 Copyright: (c) 1993-2017 Roedy Green, Canadian Mind Products #101 - 2536 Wark Street Victoria, BC Canada V8T 4G8 tel:(250) 361-9093 mailto:roedyg@mindprod.com http://mindprod.com Compiled as a Windows console App, without precompiled headers. application type:exe mfc: use standard windows libraries no ATL charset : multibyte no common language runtime no precompiled headers. /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_VC80_UPGRADE=0x0600" /D "_MBCS" /Gm /EHsc /RTC1 /MTd /Fo".\Debug/" /Fd".\Debug/" /FR".\Debug\\" /W3 /nologo /c /ZI /TP /errorReport:prompt For Visual C++ Express options see http://mindprod.com/jgloss/cpp.html#COMPILING Version History: 1.0 1998-09-20 1.1 1998-11-08 embed new address 1.2 2008-01-02 convert to Windows from 8.3 DOS. convert to POSIX convert to safe char handling library 1.3 2008-01-03 correct placement of temp display banner on any error 1.4 2008-04-09 get working on MS Visual Studio 1.5 2009-03-11 bundle with ANT build script 1.6 2010-02-03 rename variables and methods to Java stardards expand maxCols from 40 to 1024 */ /* ==================================== */ #include /* exit, putenv, _splitpath */ #include /* isUpper isLower toUpper */ #include /* fclose, fgetc, printf, remove, rename, setvbuf */ #include #include /* getch, putch */ #include #define esc '\x1B' #define maxCols 1024 /* maximum number of columns we can align */ /* ==================================== */ typedef enum Category { comment, quote, code, whitespace, newline, eof } Category; /* ==================================== */ /* use all global variables and no parameter passing for simplicity. */ int padding = 2; /* how much padding to put * between the cols */ FILE *before; /* input filer containing no ^Z * chars, except possibly at the * end */ FILE *After; /* output file with tabs * expanded to tabs */ char *bFilename; /* pointer to string name of * file we will convert */ char *aFilename; /* pointer to string name of the * temporary output file */ int biggestWidth[maxCols]; /* width of widest field * in given column in * any record */ int colIndex = -1; /* which field/column we are * working on. 0 is first */ int width = 0; /* width of current column */ int cols = 0; /* how many columns there are */ int pass = 1; /* pass=1 when deciding col * widths, and pass=2 when * outputting */ /* ==================================== */ /* P R O T O T Y P E S */ void banner(void); void bldAName(void); Category categorize(int); void die (void); void doAPass(void); void endField(void); void endLine(void); void honk (void); void inField(int c); int main (int argc, char *argv[]); void safeFilename(char *fname); void startField(void); void startLine(void); /* ==================================== */ int main(int argc, char *argv[]) /* main Align */ { if ( argc != 2 /* 0=Align.Exe 1=MyFile.Txt */ ) { banner(); printf("Oops! usage: ALIGN Myfile.TXT\n\a"); exit(1); } bFilename = argv[1]; /* Want first arg */ /* grab just a pointer. */ /* Don't copy the string. */ safeFilename(bFilename); /* check before we open */ bldAName(); /* Go build a temp after name */ if ( fopen_s(&before, bFilename, "rt") ) { banner(); printf("Oops! Cannot open file %s\n\a", bFilename); exit(1); } setvbuf(before,NULL,_IOFBF,40*512); if ( fopen_s(&After, aFilename, "wt") ) { banner(); printf("Oops! Cannot open file %s\n\a", aFilename); exit(1); } setvbuf(After,NULL,_IOFBF,40*512); /* clear all column widths */ for ( colIndex = 0; colIndex < maxCols; colIndex++ ) biggestWidth[colIndex] = 0; pass = 1; doAPass(); /* calculate how wide each column is and store it in * colIndex */ /* pad the column widths to put a little space between * the columns */ for ( colIndex = 0; colIndex < cols; colIndex++ ) biggestWidth[colIndex] += padding; /* reset input file to beginning */ fseek(before, 0, SEEK_SET); pass = 2; doAPass(); /* repass the file, this time * copying to the output file */ /* Rename output to input */ fclose(before); fclose(After); remove(bFilename); rename(aFilename, bFilename); return(0); } /* main Align */ /* ==================================== */ void doAPass(void) /* Calculate how wide each column is and store it in * biggestWidth[colIndex] on pass1. First column in index * 0. On pass2, generate the output. */ { int c; /* char just read */ int blankState = 0; /** blankState - we implement the algorithm as a finite state machine. =0 reading leading blanks on a field. =1 reading non-blanks or quotes in middle of a field. =2 reading trailing blanks. when call endField v v v ____xxxx____,__,__xxx___ ^ ^ ^ when call startField */ categorize(EOF); /* reset */ startLine(); while ( (c = getc(before)) != EOF ) { switch ( categorize(c) ) { case whitespace: /* blanks */ switch ( blankState ) { case 0: blankState = 0; break; case 1: endField(); blankState = 2; break; case 2: blankState = 2; break; } break; case code: if ( c == ',' ) { switch ( blankState ) /* comma */ { case 0: startField(); /* null field */ endField(); blankState = 0; break; case 1: endField(); blankState = 0; break; case 2: blankState = 0; break; } } else { switch ( blankState ) /* ordinary non-blank */ { case 0: startField(); inField(c); blankState = 1; break; case 1: inField(c); blankState = 1; break; case 2: startField(); inField(c); blankState = 1; break; } } /* end else */ break; case comment: /* treat comments like a * non-blank */ case quote: /* something in quoted string */ switch ( blankState ) { case 0: startField(); inField(c); blankState = 1; break; case 1: inField(c); blankState = 1; break; case 2: startField(); inField(c); blankState = 1; break; } break; case newline: /* new line */ switch ( blankState ) { case 0: break; case 1: endField(); break; case 2: break; } endLine(); startLine(); blankState = 0; break; } /* end switch on char */ } /* end while */ } /* end doAPass */ /* ==================================== */ void startLine(void) { switch ( pass ) { case 1: colIndex = -1; width = 0; break; case 2: colIndex = -1; width = 0; break; } return; } /* ==================================== */ void endLine(void) { switch ( pass ) { case 1: break; case 2: putc('\n', After); break; } return; } /* ==================================== */ void startField(void) /* Field may have lead and trail spaces on it. We have just * hit the first non-blank. */ { switch ( pass ) { case 1: width = 0; if ( ++colIndex > (cols - 1) ) { cols = colIndex + 1; } break; case 2: width = 0; ++colIndex; break; } return; } /* ==================================== */ void inField(int c) /* Field may have lead and trail spaces on it. This is * called to process non-space chars in the middle of a * field. */ { switch ( pass ) { case 1: if ( colIndex >= maxCols ) { printf("Oops! This version of Align can only handle %d columns.", maxCols ); die(); } if ( ++width > biggestWidth[colIndex] ) { biggestWidth[colIndex] = width; } break; case 2: ++width; putc(c, After); break; } return; } /* ==================================== */ void endField(void) /* Field may have lead and trail spaces on it. We just hit * the first space etc. after the last non-blank. */ { switch ( pass ) { case 1: break; case 2: if ( colIndex < (cols - 1) ) { /* pad all but the last column * with, then spaces */ for ( width = biggestWidth[colIndex] - width; /* how many chars too * short we are. */ width; width-- ) { putc(' ', After); /* pad column on right * with * spaces, AFTER * the comma */ } } break; } return; } /* ==================================== */ Category categorize(int c) /** accept a character and categorize it. * * comment -- inside // or /* comment * quote -- inside single or double quote string * code -- normal code * whitespace -- whitespace in code. * Whitespace in comments and quotes counts as comment or * quote. * newline -- newline character. Newline inside comment counts * as comment. * eof -- end of file. also resets state. * * Comments require two chars to start them. The first char will be * considered code, and only the second as comment. */ { static int commentState = 0; /** commentState remembered between calls. We implement this as yet another finite state automaton. =0 normal C code =1 inside a " " =2 inside a ' ' =3 just seen "\ =4 just seen '\ =5 inside |* *| =6 just seen | =7 just seen |* ... * =8 inside || */ switch ( c ) { case EOF: /* end of file or reset */ commentState = 0; return(eof); case '\n': /* new line */ switch ( commentState ) { case 0: /* normal code */ commentState = 0; return(newline); case 1: /* inside a " " */ commentState = 0; return(newline); case 2: /* inside a ' ' */ commentState = 0; return(newline); case 3: /* just seen "\ */ commentState = 0; return(newline); case 4: /* just seen '\ */ commentState = 0; return(newline); case 5: /* inside |* *| comment */ commentState = 5; return(comment); case 6: /* just seen / */ commentState = 0; return(newline); case 7: /* just seen |* ... * */ commentState = 5; return(comment); case 8: /* inside || C++ style comment */ commentState = 0; return(newline); } case ' ': /* blanks */ case '\t': /* treat tab as white space. */ case '\x1a': /* treat ^Z as white space. */ switch ( commentState ) { case 0: /* normal code */ commentState = 0; return(whitespace); case 1: /* inside a " " */ commentState = 1; return(quote); case 2: /* inside a ' ' */ commentState = 2; return(quote); case 3: /* just seen "\ */ commentState = 1; return(quote); case 4: /* just seen '\ */ commentState = 2; return(quote); case 5: /* inside |* *| comment */ commentState = 5; return(comment); case 6: /* just seen / */ commentState = 0; return(whitespace); case 7: /* just seen |* ... * */ commentState = 5; return(comment); case 8: /* inside || C++ style comment */ commentState = 8; return(comment); } case '\"': /* double quote */ switch ( commentState ) { case 0: /* normal code */ commentState = 1; return(quote); case 1: /* inside a " " */ commentState = 0; return(quote); case 2: /* inside a ' ' */ commentState = 2; return(quote); case 3: /* just seen "\ */ commentState = 1; return(quote); case 4: /* just seen '\ */ commentState = 2; return(quote); case 5: /* inside |* *| comment */ commentState = 5; return(comment); case 6: /* just seen / */ commentState = 1; return(quote); case 7: /* just seen |* ... * */ commentState = 5; return(comment); case 8: /* inside || C++ style comment */ commentState = 8; return(comment); } case '\'': /* singlequote */ switch ( commentState ) { case 0: /* normal code */ commentState = 2; return(quote); case 1: /* inside a " " */ commentState = 1; return(quote); case 2: /* inside a ' ' */ commentState = 0; return(quote); case 3: /* just seen "\ */ commentState = 1; return(quote); case 4: /* just seen '\ */ commentState = 2; return(quote); case 5: /* inside |* *| comment */ commentState = 5; return(comment); case 6: /* just seen / */ commentState = 1; return(quote); case 7: /* just seen |* ... * */ commentState = 5; return(comment); case 8: /* inside || C++ style comment */ commentState = 8; return(comment); } case '*': /* star */ switch ( commentState ) { case 0: /* normal code */ commentState = 0; return(code); case 1: /* inside a " " */ commentState = 1; return(quote); case 2: /* inside a ' ' */ commentState = 2; return(quote); case 3: /* just seen "\ */ commentState = 1; return(quote); case 4: /* just seen '\ */ commentState = 2; return(quote); case 5: /* inside |* *| comment */ commentState = 7; return(comment); case 6: /* just seen | */ commentState = 5; return(comment); case 7: /* just seen |* ... * */ commentState = 7; return(comment); case 8: /* inside || C++ style comment */ commentState = 8; return(comment); } case '\\': /* backslash */ switch ( commentState ) { case 0: /* normal code */ commentState = 0; return(code); case 1: /* inside a " " */ commentState = 3; return(quote); case 2: /* inside a ' ' */ commentState = 4; return(quote); case 3: /* just seen "\ */ commentState = 1; return(quote); case 4: /* just seen '\ */ commentState = 2; return(quote); case 5: /* inside |* *| comment */ commentState = 5; return(comment); case 6: /* just seen | */ commentState = 0; return(code); case 7: /* just seen |* ... * */ commentState = 5; return(comment); case 8: /* inside || C++ style comment */ commentState = 8; return(comment); } case '/': /* forwardslash */ switch ( commentState ) { case 0: /* normal code */ commentState = 6; return(code); /* might be comment, but don't * know that yet */ case 1: /* inside a " " */ commentState = 1; return(quote); case 2: /* inside a ' ' */ commentState = 2; return(quote); case 3: /* just seen "\ */ commentState = 1; return(quote); case 4: /* just seen '\ */ commentState = 2; return(quote); case 5: /* inside |* *| comment */ commentState = 5; return(comment); case 6: /* just seen | */ commentState = 8; return(comment); case 7: /* just seen |* ... * */ commentState = 0; return(comment); case 8: /* inside || C++ style comment */ commentState = 8; return(comment); } default: /* non blank */ switch ( commentState ) { case 0: /* normal code */ commentState = 0; return(code); case 1: /* inside a " " */ commentState = 1; return(quote); case 2: /* inside a ' ' */ commentState = 2; return(quote); case 3: /* just seen "\ */ commentState = 1; return(quote); case 4: /* just seen '\ */ commentState = 2; return(quote); case 5: /* inside |* *| comment */ commentState = 5; return(comment); case 6: /* just seen | */ commentState = 0; return(code); case 7: /* just seen |* ... * */ commentState = 7; return(comment); case 8: /* inside || C++ style comment */ commentState = 8; return(comment); } } /* end switch(c) */ exit(2); /* should never happen */ return(eof); } /* end Categorize */ /* ==================================== */ void safeFilename(char* bFilename) { /* Ensure appropriate file name extensions. good =.ASM .PAS .etc - done without prompt bad =.EXE .COM .OBJ - abort warning =.DOC & others */ static const char * GoodExtensions [] = { ".C", ".CPP", ".H", ".HPP", ".RH", ".IH", ".TXT", ".ASM",".PAS",".BAT",".CTL",".CMD", ".LST",".MAC",".TXT",".ANS", ".USE",".KEY",0}; /* 0 is just end marker */ static const char * BadExtensions [] = { ".EXE",".COM",".OBJ",0}; int Response; /* Y or N, yes Virginia, int, C is weird */ char Extension[_MAX_EXT]; int i; /* local loop counter */ _splitpath_s( bFilename, NULL /* drive */, 0, NULL /* dir */, 0, NULL /* name */, 0, Extension, _MAX_EXT); _strupr_s(Extension, _MAX_EXT); /* convert to upper case for compare */ for ( i=0 ; GoodExtensions[i]; i++ ) { if ( strcmp(Extension,GoodExtensions[i])==0 ) { /* match, it is Good */ return; } } for ( i=0 ; BadExtensions[i] ; i++ ) { if ( strcmp(Extension,BadExtensions[i])==0 ) { /* match, it is bad */ banner(); printf("Oops! Align cannot be used on EXE COM or OBJ files " "such as %s\n", bFilename); die(); } } /* just give a warning */ printf("Warning!\n" /* new line to give room for long filename */ "Align is not usually used on %s files such as %s\n", Extension,bFilename); printf("Do you want to align the file anyway?" " (Y)es (N)o "); while ( 1 ) /* loop forever till user enters Y or N */ { honk(); Response = _getch(); /* not echoed because user might hit tab or Enter */ /* and mess up the screen */ Response = toupper(Response); /* toupper is a macro, so needs simple argument */ switch ( Response ) { case 'Y': printf("Yes\n"); return; case 'N': printf("No\n"); /* fallthru */ case esc : printf("\nAlign aborted\n"); die(); /* others, keep looping */ } } } /* SafeFileName */ /* ==================================== */ void bldAName(void) { char drive[_MAX_DRIVE]; char dir[_MAX_DIR]; char name[_MAX_FNAME]; char ext[_MAX_EXT]; char filepath[_MAX_DRIVE + _MAX_DIR]; _splitpath_s(bFilename, drive, _MAX_DRIVE, dir,_MAX_DIR, name, _MAX_FNAME, ext, _MAX_EXT); strcpy_s(filepath, _MAX_DRIVE + _MAX_DIR, drive); strcat_s(filepath, _MAX_DRIVE + _MAX_DIR, dir); /* Force to current directory if empty */ if ( strcmp(filepath, "") == 0 ) { strcpy_s(filepath, _MAX_DRIVE + _MAX_DIR, "."); } else { filepath[strlen(filepath) - 1] = 0; } _putenv("TMP="); /* Sets TMP just for this and * any "children" */ /* processes -- doesn't change parents TMP */ if ( (aFilename = _tempnam(filepath, "")) == NULL ) { banner(); printf("Oops! Cannot create the temporary work file\n\a"); exit(1); } } void banner(void) { /* display copyright banner, with shaded splashes */ printf("\n" "\xb0\xb1\xb2\xdb" " Align 1.6 " "\xdb\xb2\xb1\xb0" "\nFreeware to align columns." "\nCopyright: (c) 1993-2017 Roedy Green, Canadian Mind Products" "\n" "#101 - 2536 Wark Street, Victoria, BC Canada V8T 4G8" "\n" "tel:(250) 361-9093 mailto:roedyg@mindprod.com http://mindprod.com" "\n" "May be used freely for non-military use only" "\n\n"); } /* banner */ /* ==================================== */ void honk (void) { /* make a noise */ printf("\a"); } /* =================================== */ void die (void) { honk(); fclose (before); fclose (After); exit(1); /* exit with errorlevel = 1 */ } /* die */ /* ==================================== */ /* -30- */