278 lines
8.6 KiB
PHP
278 lines
8.6 KiB
PHP
<?php
|
|
/**
|
|
* Tokenizes doc block comments.
|
|
*
|
|
* @author Greg Sherwood <gsherwood@squiz.net>
|
|
* @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
|
|
* @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
|
|
*/
|
|
|
|
namespace PHP_CodeSniffer\Tokenizers;
|
|
|
|
use PHP_CodeSniffer\Util;
|
|
|
|
class Comment
|
|
{
|
|
|
|
|
|
/**
|
|
* Creates an array of tokens when given some PHP code.
|
|
*
|
|
* Starts by using token_get_all() but does a lot of extra processing
|
|
* to insert information about the context of the token.
|
|
*
|
|
* @param string $string The string to tokenize.
|
|
* @param string $eolChar The EOL character to use for splitting strings.
|
|
* @param int $stackPtr The position of the first token in the file.
|
|
*
|
|
* @return array
|
|
*/
|
|
public function tokenizeString($string, $eolChar, $stackPtr)
|
|
{
|
|
if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
|
echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL;
|
|
}
|
|
|
|
$tokens = [];
|
|
$numChars = strlen($string);
|
|
|
|
/*
|
|
Doc block comments start with /*, but typically contain an
|
|
extra star when they are used for function and class comments.
|
|
*/
|
|
|
|
$char = ($numChars - strlen(ltrim($string, '/*')));
|
|
$openTag = substr($string, 0, $char);
|
|
$string = ltrim($string, '/*');
|
|
|
|
$tokens[$stackPtr] = [
|
|
'content' => $openTag,
|
|
'code' => T_DOC_COMMENT_OPEN_TAG,
|
|
'type' => 'T_DOC_COMMENT_OPEN_TAG',
|
|
'comment_tags' => [],
|
|
];
|
|
|
|
$openPtr = $stackPtr;
|
|
$stackPtr++;
|
|
|
|
if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
|
$content = Util\Common::prepareForOutput($openTag);
|
|
echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL;
|
|
}
|
|
|
|
/*
|
|
Strip off the close tag so it doesn't interfere with any
|
|
of our comment line processing. The token will be added to the
|
|
stack just before we return it.
|
|
*/
|
|
|
|
$closeTag = [
|
|
'content' => substr($string, strlen(rtrim($string, '/*'))),
|
|
'code' => T_DOC_COMMENT_CLOSE_TAG,
|
|
'type' => 'T_DOC_COMMENT_CLOSE_TAG',
|
|
'comment_opener' => $openPtr,
|
|
];
|
|
|
|
if ($closeTag['content'] === false) {
|
|
$closeTag['content'] = '';
|
|
}
|
|
|
|
$string = rtrim($string, '/*');
|
|
|
|
/*
|
|
Process each line of the comment.
|
|
*/
|
|
|
|
$lines = explode($eolChar, $string);
|
|
$numLines = count($lines);
|
|
foreach ($lines as $lineNum => $string) {
|
|
if ($lineNum !== ($numLines - 1)) {
|
|
$string .= $eolChar;
|
|
}
|
|
|
|
$char = 0;
|
|
$numChars = strlen($string);
|
|
|
|
// We've started a new line, so process the indent.
|
|
$space = $this->collectWhitespace($string, $char, $numChars);
|
|
if ($space !== null) {
|
|
$tokens[$stackPtr] = $space;
|
|
$stackPtr++;
|
|
if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
|
$content = Util\Common::prepareForOutput($space['content']);
|
|
echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL;
|
|
}
|
|
|
|
$char += strlen($space['content']);
|
|
if ($char === $numChars) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ($string === '') {
|
|
continue;
|
|
}
|
|
|
|
if ($lineNum > 0 && $string[$char] === '*') {
|
|
// This is a function or class doc block line.
|
|
$char++;
|
|
$tokens[$stackPtr] = [
|
|
'content' => '*',
|
|
'code' => T_DOC_COMMENT_STAR,
|
|
'type' => 'T_DOC_COMMENT_STAR',
|
|
];
|
|
|
|
$stackPtr++;
|
|
|
|
if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
|
echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL;
|
|
}
|
|
}
|
|
|
|
// Now we are ready to process the actual content of the line.
|
|
$lineTokens = $this->processLine($string, $eolChar, $char, $numChars);
|
|
foreach ($lineTokens as $lineToken) {
|
|
$tokens[$stackPtr] = $lineToken;
|
|
if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
|
$content = Util\Common::prepareForOutput($lineToken['content']);
|
|
$type = $lineToken['type'];
|
|
echo "\t\tCreate comment token: $type => $content".PHP_EOL;
|
|
}
|
|
|
|
if ($lineToken['code'] === T_DOC_COMMENT_TAG) {
|
|
$tokens[$openPtr]['comment_tags'][] = $stackPtr;
|
|
}
|
|
|
|
$stackPtr++;
|
|
}
|
|
}//end foreach
|
|
|
|
$tokens[$stackPtr] = $closeTag;
|
|
$tokens[$openPtr]['comment_closer'] = $stackPtr;
|
|
if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
|
$content = Util\Common::prepareForOutput($closeTag['content']);
|
|
echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL;
|
|
}
|
|
|
|
if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
|
echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL;
|
|
}
|
|
|
|
return $tokens;
|
|
|
|
}//end tokenizeString()
|
|
|
|
|
|
/**
|
|
* Process a single line of a comment.
|
|
*
|
|
* @param string $string The comment string being tokenized.
|
|
* @param string $eolChar The EOL character to use for splitting strings.
|
|
* @param int $start The position in the string to start processing.
|
|
* @param int $end The position in the string to end processing.
|
|
*
|
|
* @return array
|
|
*/
|
|
private function processLine($string, $eolChar, $start, $end)
|
|
{
|
|
$tokens = [];
|
|
|
|
// Collect content padding.
|
|
$space = $this->collectWhitespace($string, $start, $end);
|
|
if ($space !== null) {
|
|
$tokens[] = $space;
|
|
$start += strlen($space['content']);
|
|
}
|
|
|
|
if (isset($string[$start]) === false) {
|
|
return $tokens;
|
|
}
|
|
|
|
if ($string[$start] === '@') {
|
|
// The content up until the first whitespace is the tag name.
|
|
$matches = [];
|
|
preg_match('/@[^\s]+/', $string, $matches, 0, $start);
|
|
if (isset($matches[0]) === true
|
|
&& substr(strtolower($matches[0]), 0, 7) !== '@phpcs:'
|
|
) {
|
|
$tagName = $matches[0];
|
|
$start += strlen($tagName);
|
|
$tokens[] = [
|
|
'content' => $tagName,
|
|
'code' => T_DOC_COMMENT_TAG,
|
|
'type' => 'T_DOC_COMMENT_TAG',
|
|
];
|
|
|
|
// Then there will be some whitespace.
|
|
$space = $this->collectWhitespace($string, $start, $end);
|
|
if ($space !== null) {
|
|
$tokens[] = $space;
|
|
$start += strlen($space['content']);
|
|
}
|
|
}
|
|
}//end if
|
|
|
|
// Process the rest of the line.
|
|
$eol = strpos($string, $eolChar, $start);
|
|
if ($eol === false) {
|
|
$eol = $end;
|
|
}
|
|
|
|
if ($eol > $start) {
|
|
$tokens[] = [
|
|
'content' => substr($string, $start, ($eol - $start)),
|
|
'code' => T_DOC_COMMENT_STRING,
|
|
'type' => 'T_DOC_COMMENT_STRING',
|
|
];
|
|
}
|
|
|
|
if ($eol !== $end) {
|
|
$tokens[] = [
|
|
'content' => substr($string, $eol, strlen($eolChar)),
|
|
'code' => T_DOC_COMMENT_WHITESPACE,
|
|
'type' => 'T_DOC_COMMENT_WHITESPACE',
|
|
];
|
|
}
|
|
|
|
return $tokens;
|
|
|
|
}//end processLine()
|
|
|
|
|
|
/**
|
|
* Collect consecutive whitespace into a single token.
|
|
*
|
|
* @param string $string The comment string being tokenized.
|
|
* @param int $start The position in the string to start processing.
|
|
* @param int $end The position in the string to end processing.
|
|
*
|
|
* @return array|null
|
|
*/
|
|
private function collectWhitespace($string, $start, $end)
|
|
{
|
|
$space = '';
|
|
for ($start; $start < $end; $start++) {
|
|
if ($string[$start] !== ' ' && $string[$start] !== "\t") {
|
|
break;
|
|
}
|
|
|
|
$space .= $string[$start];
|
|
}
|
|
|
|
if ($space === '') {
|
|
return null;
|
|
}
|
|
|
|
$token = [
|
|
'content' => $space,
|
|
'code' => T_DOC_COMMENT_WHITESPACE,
|
|
'type' => 'T_DOC_COMMENT_WHITESPACE',
|
|
];
|
|
|
|
return $token;
|
|
|
|
}//end collectWhitespace()
|
|
|
|
|
|
}//end class
|