#!/usr/bin/php errors.log" to put any errors into errors.log (doesn't work in tcsh shell of Mac OS 10.3) // If you edit this file, you MUST save it in unix format (from the file menu in the document's window in BBEdit) // for info on using php from the command line, visit http://www.sitepoint.com/article/php-command-line-1 // it's best to use fwrite( STDOUT, "string" ); instead of echo if you wish to use this script on windows // use fwrite( STDERR, "string" ); to log errors // use $userInput = fgets( STDIN ); or $userInput = fgetc( STDIN ); to read user input from the command line // MakeSource() takes the full path to a file, and if it ends in ".h", builds the source for the file // user defines define( "kAllowForce", false ); // set to true to allow -f (force overwrite) as an option from the command line (far too dangerous when dealing with human-years worth of source code!) // regular expression defines define( "kLineComment", "//(.*)" ); define( "kWrapComment", "/\*(.*)\*/" ); define( "kMacro", "^[ \t]*\#(.*)" ); define( "kTemplate", "^(template[ \t]*<[^>]+>[ \t\n\r]*)" ); define( "kOptionalTemplate", "^(template[ \t]*<[^>]+>[ \t\n\r]*)?" ); define( "kClass", "^(class|struct)[ \t]+([a-zA-Z0-9_]+(<[^>]+>)?)[ \t\n\r]*[:{]" ); define( "kFunction", "^([^\(#\r/@].*[ \t]*)?\*?([A-Za-z0-9~_]+(<[^>]*>)?::[-A-Za-z0-9~_+=<>\|\*/]+|[A-Za-z0-9~_]+)[ \t\r]*\(" ); define( "kClassWithOptionalTemplate", "^(template[ \t]*<[^>]+>[ \t\n\r]*)?(class|struct)[ \t]+([a-zA-Z0-9_]+(<[^>]+>)?)[ \t\n\r]*[:{]" ); define( "kFunctionWithOptionalTemplate", "^(template[ \t]*<[^>]+>[ \t\n\r]*)?^([^ \t\(#\r/@].*[ \t]*)?\*?([A-Za-z0-9~_]+(<[^>]*>)?::[-A-Za-z0-9~_+=<>\|\*/]+|[A-Za-z0-9~_]+)[ \t\r]*\(" ); define( "kSpecialChar", "€"/*chr( 7 )*/ ); // a special character to use as a placeholder for comments (bullet may only work on Mac) function MacroCallback( $matches ) { return( str_repeat( ' ', strlen( $matches[0] ) ) ); // replace comments with the bell character, a reserved character in c++ which is never used } function CommentCallback( $matches ) { return( str_repeat( kSpecialChar , strlen( $matches[0] ) ) ); // replace comments with the bell character, a reserved character in c++ which is never used } // repeats over the letters, keeping track of nesting, and returns the offset of the closing char // pass 0 to ignore length. Also, if the first char is not one of ([{<³ and no close char has been given, then 0 is returned // ex: CloseBlock( '( foo( bar() ) ) abcdefg()', 0, 20 ) returns 15. If no closing char is found, false is returned function CloseBlock( $haystack, $offset = 0, $length = 0, $close = '' ) { $end = strlen( $haystack ); if( $length > 0 ) if( $end > $offset + $length ) $end = $offset + $length; if( $end <= 1 ) return( false ); $open = $haystack[$offset]; if( $close == '' ) // must choose the close char if none is given switch( $open ) { case '(': $close = ')'; break; case '[': $close = ']'; break; case '{': $close = '}'; break; case '<': $close = '>'; break; case '³': $close = '²'; break; default: return( false ); } $depth = 0; for( $spot = $offset; $spot < $end; $spot++ ) { if( $haystack[$spot] == $open ) $depth++; if( $haystack[$spot] == $close ) { $depth--; if( $depth == 0 ) return( $spot ); } } return( false ); } function TokensToCode( $items, $templateName = '', $className = '' ) { $result = ''; foreach( $items as $key => $val ) { if( key( $val ) == 'class' ) { $class = $val[key( $val )]; $name = key( $class ); // the class declaration is the key of the array $name = substr( $name, 0, -1 ); // trim off the curly bracket $name = rtrim( $name ); // trim off any returns or whitespace from the end of the class name $templateArray = explode( '>', $name ); $thisTemplateName = ''; if( count( $templateArray ) > 1 ) // template was found { $thisTemplateName = $templateArray[0].">\n"; $name = $templateArray[1]; $name = ltrim( $name ); // trim off any returns or whitespace from the beginning of the class name // get the template type preg_match( "!\<.*\>!m", $thisTemplateName, $matches, PREG_OFFSET_CAPTURE ); // strip the word class from the template type $trailingTemplate = preg_replace( "![ \t]*class[ \t]*!m", '', $matches[0][0] ); // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant $name .= $trailingTemplate; } // trim the word class from the class name $name = preg_replace( "![ \t]*class[ \t]*!m", '', $name ); // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant $result .= TokensToCode( $class[key( $class )], $templateName.$thisTemplateName.'', $className.$name.'::' ); // call again, this time with the class name as the prefix } else { $aboveCommentEnd = strpos( current( $val ), kSpecialChar ); $comment = substr( current( $val ), 0, $aboveCommentEnd ); $function = substr( current( $val ), $aboveCommentEnd+1 ); // insert the prefix before the routine name, ex: void foo:bar( void ){} $name = preg_replace( "!([^ \t]+[ \t]*\()!m", $className."\\1", $function ); // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant $result .= $comment.$templateName.$name."\n{\n\t\n}\n\n"; } } return( $result ); } // loop over the code and return an array of the classes and routines where keys are 'class' or 'function' and values are the items // ex: CodeToTokens( 'class foo{}; void bar( void ){}' ) returns { 'class'=>'void foo( void ){}', 'function'=>'void bar( void ){}' } // original is the code before comments have been replaced with whitespace. If you pass in original, no cleanup is done on the code. // if you pass true for comments, comments next to the prototypes will be written alongside the functions in the .c file function CodeToTokens( $code, $comments = false, $original = '' ) { if( !$original ) { // first replace carriage return line feed with just line feed, in the case of dos returns style (ASCII 13,10 to 10) $code = str_replace( "\r\n", "\n", $code ); // now replace carriage return with just line feed, in the case of mac returns style (ASCII 13 to 10) $code = str_replace( "\r", "\n", $code ); if( $comments ) { // strip out all macros/defines/pragmas $code = preg_replace_callback( "!(".kMacro.")!m", // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant 'MacroCallback', $code ); $original = $code; // make a backup since we will be replacing comments with a reserved char for later insertion // strip out all comments $code = preg_replace_callback( "!(".kLineComment.")|(".kWrapComment.")!m", // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant 'CommentCallback', $code ); } else { $code = preg_replace( "!(".kMacro.")|(".kLineComment.")|(".kWrapComment.")!m", '', $code ); // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant $original = ''; // without comments, original is not used } } $spot = 0; $result = null; $done = false; while( true ) { $section = substr( $code, $spot ); // the first matched item is always in $out[0] if( !preg_match( "!". "(".kFunction.")". // find functions, template optional "|(".kClassWithOptionalTemplate.")". // find classes, template optional "!m", // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant $section, $out, PREG_OFFSET_CAPTURE ) ) break; $itemSize = strlen( $out[0][0] ); $item = ltrim( $out[0][0] ); $offset = $out[0][1]; // $offset is at $code[$spot] + the offset where the string was found if( substr( $item, 0, 5 ) == 'class' || substr( $item, 0, 8 ) == 'template' ) // found a class, last char will be a '{' { $openOffset = $spot + $offset + $itemSize - 1; // get the position of the opening curly bracket or colon if( $code[$openOffset] == ':' ) $openOffset = strpos( $code, '{', $openOffset ); // if colon, get position of next opening curly bracket $closeOffset = CloseBlock( $code, $openOffset ); $skip = $closeOffset-$openOffset; $result []= Array( 'class' => Array( $item => CodeToTokens( substr( $code, $openOffset + 1, $skip ), $comments, substr( $original, $openOffset + 1, $skip ) ) ) ); } else // found a function, last character will most likely be a '(' { $whiteCount = strlen( $out[0][0] ) - strlen( $item ); // get number of white chars removed from the beginning $pos = strpos( $item, '(' ); if( $pos !== false ) $item = substr( $item, 0, $pos+1 ); // now last char is definitely '(' $openOffset = $spot + $offset + $whiteCount + strlen( $item ) - 1; // get the position of the opening paren $closeOffset = CloseBlock( $code, $openOffset ); if( $closeOffset ) { if( ($semi = strpos( $code, ';', $closeOffset )) === false ) $semi = 1000000000; // a prototype if( ($curly = strpos( $code, '{', $closeOffset )) === false ) $curly = 1000000000; // a function if( ($equals = strpos( $code, '=', $closeOffset )) === false ) $equals = 1000000000; // a pure virtual function if inside a class // we are only searching out prototypes in a class, or prototypes and functions if not in a class if( $semi < $curly && $semi < $equals ) // a prototype or class function { $params = substr( $code, $openOffset+1, $closeOffset-$openOffset-1 ); if( $code[$closeOffset-1] == ' ' ) // this is a style test, many users type their routines "foo( bar )" instead of "foo(bar)" $spaceBefore = ' '; else $spaceBefore = ''; // strip out default params to function $params = preg_replace( "![ \t]*\=[^\,\)]*!m", '', $params ); // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant if( substr( $params, -1 ) != ' ' ) $params .= $spaceBefore; $params .= ')'; // find any comments above the function ***BUG, this line is very slow for some reason, try to optimize $matches = ''; if( $comments ) preg_match( "!^[ \t\n".kSpecialChar."]+$!m", substr( $section, 0, $offset ), $matches, PREG_OFFSET_CAPTURE ); $beforeComment = ''; if( $matches ) if( $matches[0] ) if( $matches[0][0] ) // a comment was found { $beforeCommentStart = $matches[0][1]; $beforeCommentLength = strlen( $matches[0][0] ); $beforeComment = substr( $original, $spot + $beforeCommentStart, $beforeCommentLength ); $beforeComment = ltrim( $beforeComment ); $beforeComment = rtrim( $beforeComment ); if( strlen( $beforeComment ) > 0 ) { // remove whitespace from the front of comments $beforeComment = preg_replace( "!^[ \t]*(.+)!m", '\1', $beforeComment ); // use m for PCRE_MULTILINE, since php doesn't seem to recognize the constant $beforeComment .= "\n"; } } // find any comments to the right of the function if( $comments ) preg_match( "!^[ \t".kSpecialChar."]+\n!", substr( $code, $semi+1 ), $matches, PREG_OFFSET_CAPTURE ); $afterComment = ''; if( $matches ) if( $matches[0] ) if( $matches[0][0] ) // a comment was found { $afterCommentStart = $matches[0][1]; $afterCommentLength = strlen( $matches[0][0] ); $afterComment = substr( $original, $semi + 1 + $afterCommentStart, $afterCommentLength ); $afterComment = ltrim( $afterComment ); $afterComment = rtrim( $afterComment ); $afterComment .= "\n"; } $result []= Array( 'function' => $beforeComment.$afterComment.kSpecialChar.$item.$params/*."\t".$afterComment*/ ); // comment.kSpecialChar.function $skip = 0; } else if( $curly < $semi && $curly < $equals ) // strip out functions { $closeOffset = CloseBlock( $code, $curly ); if( $closeOffset ) $skip = $closeOffset-$openOffset; // else someday put an exception or warning here } else if( $equals < $semi && $equals < $curly ) // strip out pure virtual functions { $closeOffset = strpos( $code, ';', $closeOffset ); if( $closeOffset !== false ) $skip = $closeOffset-$openOffset; // else someday put an exception or warning here } } // else someday put an exception or warning here } $spot += $offset + $itemSize + $skip; } return( $result ); } // this routine creates the source file for the given header file fileName function MakeSource( $fileName ) { global $suffix, $comments, $force; if( substr( $fileName, -2 ) == '.h' ) { $file = fopen( $fileName, 'r' ); if( !$file ) { fwrite( STDERR, "file $file not found\n" ); exit( 2 ); } $newFileName = substr( $fileName, 0, -2 ).$suffix; if( $force || !file_exists( $newFileName ) ) { $newFile = fopen( $newFileName, 'w' ); if( $newFile ) { $result = "#include \"$fileName\"\n\n"; fseek( $file, 0, SEEK_END ); // get the size of the header since we don't have its name and can't call filesize() $length = ftell( $file ); rewind( $file ); $code = fread( $file, $length ); $items = CodeToTokens( $code, $comments ); $result .= TokensToCode( $items ); if( fwrite( $newFile, $result ) === false ) fwrite( STDERR, "Cannot write to file $newFileName\n" ); fclose( $newFile ); } } } } // this section loops files based on the command line arguments function CliErrorHandler( $errno, $errstr, $errfile, $errline ) { fwrite( STDERR, "$errstr in $errfile on $errline\n" ); } set_error_handler('CliErrorHandler'); // Tell PHP to use the error handler // loop over the directory, getting all files inside of it. // if recursive is true, loop over all subdirectories. // if a callback is passed, it is called for each file. // if no callback is passed, a tree of files is returned. // the tree contains arrays of keys and vals, where keys are files or folders which point to another array function LoopDirectory( $start_directory, $recursive = false, $callback = null ) { if( $dir = @opendir( $start_directory ) ) { if( $callback == null ) $tree = array(); while( false !== ( $file = readdir( $dir ) ) ) { if( $file != "." && $file != ".." ) { $absolute_file = $start_directory; if( substr( $absolute_file, -1 ) != '/' ) $absolute_file .= '/'; $absolute_file .= $file; if( is_file( $absolute_file ) ) { if( $callback == null ) $tree[$file] = null; else $callback( $absolute_file ); } else if( is_dir( $absolute_file ) ) { if( $callback == null ) { if( $recursive ) $tree[$file] = LoopDirectory( $absolute_file."/", $recursive, $callback ); else $tree[$file] = Array(); // set the entry to an array to show a folder for convenience } else { $callback( $absolute_file ); if( $recursive ) LoopDirectory( $absolute_file."/", $recursive, $callback ); } } } } } else return null; if( $callback == null ) return $tree; else return( null ); } function Callback( $fileName ) { MakeSource( $fileName ); } $start = 1; $all = false; $suffix = '.c'; $comments = true; $force = false; if( $argc > 1 ) if( $argv[1][0] == '-' ) { $flag = substr( $argv[1], 1 ); while( $flag ) { switch( $flag[0] ) { case 'r': $all = true; break; case 'c': $suffix = '.cc'; break; case 'n': $comments = false; break; case 'p': $suffix = '.cpp'; break; case 'h': fwrite( STDOUT, "examples:\n" ); fwrite( STDOUT, "./htoc.php // invalid. For safety reasons, must always pass a file or dir, even if simply .\n" ); fwrite( STDOUT, "./htoc.php . // makes filename.c for every file in the directory\n" ); fwrite( STDOUT, "./htoc.php filename1.h filename2.h // makes filename1.c and filename2.c for filename1.h and filename2.h\n" ); fwrite( STDOUT, "./htoc.php somedirectory // makes filename.c for every file in the directory (multiple directories allowed)\n" ); fwrite( STDOUT, "./htoc.php -c filename.h // makes filename.cc for filename.h\n" ); fwrite( STDOUT, "./htoc.php -p filename.h // makes filename.cpp for filename.h\n" ); fwrite( STDOUT, "./htoc.php -n filename.h // makes filename.c with no comments\n" ); fwrite( STDOUT, "./htoc.php -r . // makes sources recursively for all .h files in the directory\n" ); fwrite( STDOUT, "./htoc.php -rc . // makes sources recursively for all .h files in the directory, using .cc ending\n" ); fwrite( STDOUT, "./htoc.php -rp . // makes sources recursively for all .h files in the directory, using .cpp ending\n" ); fwrite( STDOUT, "./htoc.php -f filename.h // (CAUTION) force overwrite filename.c, even if it already exists. Must define kAllowForce to true in htoc.php to use.\n" ); fwrite( STDOUT, "./htoc.php -h // view help for htoc.php\n" ); exit( 0 ); case 'f': if( kAllowForce ) { $force = true; break; } default: fwrite( STDOUT, "htoc.php: illegal option -- {$flag[0]}\n" ); $start = $argc; // cause improper usage below break; } if( $start >= $argc ) break; // found an improper usage $flag = substr( $flag, 1 ); } $start++; } if( $start >= $argc ) { fwrite( STDOUT, "usage: htoc.php [-rcnph" ); if( kAllowForce ) fwrite( STDOUT, "f" ); fwrite( STDOUT, "] filename1.h ...\n" ); exit( 1 ); // Operation not permitted } for( $count = $start; $count < $argc; $count++ ) { if( is_file( $argv[$count] ) ) MakeSource( $argv[$count] ); else LoopDirectory( $argv[$count], $all, "Callback" ); } exit( 0 ); // return no error code, the program executed successfully ?>