* @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600) * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence */ namespace PHP_CodeSniffer\Tokenizers; use PHP_CodeSniffer\Util; class Comment { /** * Creates an array of tokens when given some PHP code. * * Starts by using token_get_all() but does a lot of extra processing * to insert information about the context of the token. * * @param string $string The string to tokenize. * @param string $eolChar The EOL character to use for splitting strings. * @param int $stackPtr The position of the first token in the file. * * @return array */ public function tokenizeString($string, $eolChar, $stackPtr) { if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL; } $tokens = []; $numChars = strlen($string); /* Doc block comments start with /*, but typically contain an extra star when they are used for function and class comments. */ $char = ($numChars - strlen(ltrim($string, '/*'))); $openTag = substr($string, 0, $char); $string = ltrim($string, '/*'); $tokens[$stackPtr] = [ 'content' => $openTag, 'code' => T_DOC_COMMENT_OPEN_TAG, 'type' => 'T_DOC_COMMENT_OPEN_TAG', 'comment_tags' => [], ]; $openPtr = $stackPtr; $stackPtr++; if (PHP_CODESNIFFER_VERBOSITY > 1) { $content = Util\Common::prepareForOutput($openTag); echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL; } /* Strip off the close tag so it doesn't interfere with any of our comment line processing. The token will be added to the stack just before we return it. */ $closeTag = [ 'content' => substr($string, strlen(rtrim($string, '/*'))), 'code' => T_DOC_COMMENT_CLOSE_TAG, 'type' => 'T_DOC_COMMENT_CLOSE_TAG', 'comment_opener' => $openPtr, ]; if ($closeTag['content'] === false) { $closeTag['content'] = ''; } $string = rtrim($string, '/*'); /* Process each line of the comment. */ $lines = explode($eolChar, $string); $numLines = count($lines); foreach ($lines as $lineNum => $string) { if ($lineNum !== ($numLines - 1)) { $string .= $eolChar; } $char = 0; $numChars = strlen($string); // We've started a new line, so process the indent. $space = $this->collectWhitespace($string, $char, $numChars); if ($space !== null) { $tokens[$stackPtr] = $space; $stackPtr++; if (PHP_CODESNIFFER_VERBOSITY > 1) { $content = Util\Common::prepareForOutput($space['content']); echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL; } $char += strlen($space['content']); if ($char === $numChars) { break; } } if ($string === '') { continue; } if ($lineNum > 0 && $string[$char] === '*') { // This is a function or class doc block line. $char++; $tokens[$stackPtr] = [ 'content' => '*', 'code' => T_DOC_COMMENT_STAR, 'type' => 'T_DOC_COMMENT_STAR', ]; $stackPtr++; if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL; } } // Now we are ready to process the actual content of the line. $lineTokens = $this->processLine($string, $eolChar, $char, $numChars); foreach ($lineTokens as $lineToken) { $tokens[$stackPtr] = $lineToken; if (PHP_CODESNIFFER_VERBOSITY > 1) { $content = Util\Common::prepareForOutput($lineToken['content']); $type = $lineToken['type']; echo "\t\tCreate comment token: $type => $content".PHP_EOL; } if ($lineToken['code'] === T_DOC_COMMENT_TAG) { $tokens[$openPtr]['comment_tags'][] = $stackPtr; } $stackPtr++; } }//end foreach $tokens[$stackPtr] = $closeTag; $tokens[$openPtr]['comment_closer'] = $stackPtr; if (PHP_CODESNIFFER_VERBOSITY > 1) { $content = Util\Common::prepareForOutput($closeTag['content']); echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL; } if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL; } return $tokens; }//end tokenizeString() /** * Process a single line of a comment. * * @param string $string The comment string being tokenized. * @param string $eolChar The EOL character to use for splitting strings. * @param int $start The position in the string to start processing. * @param int $end The position in the string to end processing. * * @return array */ private function processLine($string, $eolChar, $start, $end) { $tokens = []; // Collect content padding. $space = $this->collectWhitespace($string, $start, $end); if ($space !== null) { $tokens[] = $space; $start += strlen($space['content']); } if (isset($string[$start]) === false) { return $tokens; } if ($string[$start] === '@') { // The content up until the first whitespace is the tag name. $matches = []; preg_match('/@[^\s]+/', $string, $matches, 0, $start); if (isset($matches[0]) === true && substr(strtolower($matches[0]), 0, 7) !== '@phpcs:' ) { $tagName = $matches[0]; $start += strlen($tagName); $tokens[] = [ 'content' => $tagName, 'code' => T_DOC_COMMENT_TAG, 'type' => 'T_DOC_COMMENT_TAG', ]; // Then there will be some whitespace. $space = $this->collectWhitespace($string, $start, $end); if ($space !== null) { $tokens[] = $space; $start += strlen($space['content']); } } }//end if // Process the rest of the line. $eol = strpos($string, $eolChar, $start); if ($eol === false) { $eol = $end; } if ($eol > $start) { $tokens[] = [ 'content' => substr($string, $start, ($eol - $start)), 'code' => T_DOC_COMMENT_STRING, 'type' => 'T_DOC_COMMENT_STRING', ]; } if ($eol !== $end) { $tokens[] = [ 'content' => substr($string, $eol, strlen($eolChar)), 'code' => T_DOC_COMMENT_WHITESPACE, 'type' => 'T_DOC_COMMENT_WHITESPACE', ]; } return $tokens; }//end processLine() /** * Collect consecutive whitespace into a single token. * * @param string $string The comment string being tokenized. * @param int $start The position in the string to start processing. * @param int $end The position in the string to end processing. * * @return array|null */ private function collectWhitespace($string, $start, $end) { $space = ''; for ($start; $start < $end; $start++) { if ($string[$start] !== ' ' && $string[$start] !== "\t") { break; } $space .= $string[$start]; } if ($space === '') { return null; } $token = [ 'content' => $space, 'code' => T_DOC_COMMENT_WHITESPACE, 'type' => 'T_DOC_COMMENT_WHITESPACE', ]; return $token; }//end collectWhitespace() }//end class