SideEffectsDetector.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. <?php
  2. namespace staabm\SideEffectsDetector;
  3. final class SideEffectsDetector {
  4. /**
  5. * @var array<int>
  6. */
  7. private array $scopePollutingTokens = [
  8. T_CLASS,
  9. T_FUNCTION,
  10. T_NEW,
  11. T_EVAL,
  12. T_GLOBAL,
  13. T_GOTO,
  14. T_HALT_COMPILER,
  15. T_INCLUDE,
  16. T_INCLUDE_ONCE,
  17. T_REQUIRE,
  18. T_REQUIRE_ONCE,
  19. T_THROW,
  20. T_UNSET,
  21. T_UNSET_CAST
  22. ];
  23. private const PROCESS_EXIT_TOKENS = [
  24. T_EXIT
  25. ];
  26. private const OUTPUT_TOKENS = [
  27. T_PRINT,
  28. T_ECHO,
  29. T_INLINE_HTML
  30. ];
  31. private const SCOPE_POLLUTING_FUNCTIONS = [
  32. 'putenv',
  33. 'setlocale',
  34. 'class_exists',
  35. 'ini_set',
  36. ];
  37. private const STANDARD_OUTPUT_FUNCTIONS = [
  38. 'printf',
  39. 'vprintf'
  40. ];
  41. private const INPUT_OUTPUT_FUNCTIONS = [
  42. 'fopen',
  43. 'file_get_contents',
  44. 'file_put_contents',
  45. 'fwrite',
  46. 'fputs',
  47. 'fread',
  48. 'unlink'
  49. ];
  50. /**
  51. * @var array<string, array{'hasSideEffects': bool}>
  52. */
  53. private array $functionMetadata;
  54. public function __construct() {
  55. $functionMeta = require __DIR__ . '/functionMetadata.php';
  56. if (!is_array($functionMeta)) {
  57. throw new \RuntimeException('Invalid function metadata');
  58. }
  59. $this->functionMetadata = $functionMeta;
  60. if (defined('T_ENUM')) {
  61. $this->scopePollutingTokens[] = T_ENUM;
  62. }
  63. }
  64. /**
  65. * @api
  66. *
  67. * @return array<SideEffect::*>
  68. */
  69. public function getSideEffects(string $code): array {
  70. $tokens = token_get_all($code);
  71. $sideEffects = [];
  72. for ($i = 0; $i < count($tokens); $i++) {
  73. $token = $tokens[$i];
  74. if (!is_array($token)) {
  75. continue;
  76. }
  77. if ($this->isAnonymousFunction($tokens, $i)) {
  78. continue;
  79. }
  80. if (in_array($token[0], self::OUTPUT_TOKENS, true)) {
  81. $sideEffects[] = SideEffect::STANDARD_OUTPUT;
  82. continue;
  83. }
  84. if (in_array($token[0], self::PROCESS_EXIT_TOKENS, true)) {
  85. $sideEffects[] = SideEffect::PROCESS_EXIT;
  86. continue;
  87. }
  88. if (in_array($token[0], $this->scopePollutingTokens, true)) {
  89. $sideEffects[] = SideEffect::SCOPE_POLLUTION;
  90. $i++;
  91. if (in_array($token[0], [T_FUNCTION, T_CLASS], true)) {
  92. $this->consumeWhitespaces($tokens, $i);
  93. }
  94. // consume function/class-name
  95. if (
  96. !array_key_exists($i, $tokens)
  97. || !is_array($tokens[$i])
  98. || $tokens[$i][0] !== T_STRING
  99. ) {
  100. continue;
  101. }
  102. $i++;
  103. continue;
  104. }
  105. $functionCall = $this->getFunctionCall($tokens, $i);
  106. if ($functionCall !== null) {
  107. $callSideEffect = $this->getFunctionCallSideEffect($functionCall);
  108. if ($callSideEffect !== null) {
  109. $sideEffects[] = $callSideEffect;
  110. }
  111. continue;
  112. }
  113. $methodCall = $this->getMethodCall($tokens, $i);
  114. if ($methodCall !== null) {
  115. $sideEffects[] = SideEffect::MAYBE;
  116. continue;
  117. }
  118. $propertyAccess = $this->getPropertyAccess($tokens, $i);
  119. if ($propertyAccess !== null) {
  120. $sideEffects[] = SideEffect::SCOPE_POLLUTION;
  121. continue;
  122. }
  123. if ($this->isNonLocalVariable($tokens, $i)) {
  124. $sideEffects[] = SideEffect::SCOPE_POLLUTION;
  125. continue;
  126. }
  127. }
  128. return array_values(array_unique($sideEffects));
  129. }
  130. /**
  131. * @return SideEffect::*|null
  132. */
  133. private function getFunctionCallSideEffect(string $functionName): ?string { // @phpstan-ignore return.unusedType
  134. if (in_array($functionName, self::STANDARD_OUTPUT_FUNCTIONS, true)) {
  135. return SideEffect::STANDARD_OUTPUT;
  136. }
  137. if (in_array($functionName, self::INPUT_OUTPUT_FUNCTIONS, true)) {
  138. return SideEffect::INPUT_OUTPUT;
  139. }
  140. if (in_array($functionName, self::SCOPE_POLLUTING_FUNCTIONS, true)) {
  141. return SideEffect::SCOPE_POLLUTION;
  142. }
  143. if (array_key_exists($functionName, $this->functionMetadata)) {
  144. if ($this->functionMetadata[$functionName]['hasSideEffects'] === true) {
  145. return SideEffect::UNKNOWN_CLASS;
  146. }
  147. } else {
  148. try {
  149. $reflectionFunction = new \ReflectionFunction($functionName);
  150. $returnType = $reflectionFunction->getReturnType();
  151. if ($returnType === null) {
  152. return SideEffect::MAYBE; // no reflection information -> we don't know
  153. }
  154. if ((string)$returnType === 'void') {
  155. return SideEffect::UNKNOWN_CLASS; // functions with void return type must have side-effects
  156. }
  157. } catch (\ReflectionException $e) {
  158. return SideEffect::MAYBE; // function does not exist -> we don't know
  159. }
  160. }
  161. return null;
  162. }
  163. /**
  164. * @param array<int, array{0:int,1:string,2:int}|string|int> $tokens
  165. */
  166. private function getFunctionCall(array $tokens, int $index): ?string {
  167. if (
  168. !array_key_exists($index, $tokens)
  169. || !is_array($tokens[$index])
  170. || $tokens[$index][0] !== T_STRING
  171. ) {
  172. return null;
  173. }
  174. $functionName = $tokens[$index][1];
  175. $index++;
  176. $this->consumeWhitespaces($tokens, $index);
  177. if (
  178. array_key_exists($index, $tokens)
  179. && $tokens[$index] === '('
  180. ) {
  181. return $functionName;
  182. }
  183. return null;
  184. }
  185. /**
  186. * @param array<int, array{0:int,1:string,2:int}|string|int> $tokens
  187. */
  188. private function getMethodCall(array $tokens, int $index): ?string {
  189. if (
  190. !array_key_exists($index, $tokens)
  191. || !is_array($tokens[$index])
  192. || !in_array($tokens[$index][0], [T_VARIABLE, T_STRING], true)
  193. ) {
  194. return null;
  195. }
  196. $callee = $tokens[$index][1];
  197. $index++;
  198. $this->consumeWhitespaces($tokens, $index);
  199. if (
  200. !array_key_exists($index, $tokens)
  201. || !is_array($tokens[$index])
  202. || !in_array($tokens[$index][0], [T_OBJECT_OPERATOR , T_DOUBLE_COLON ], true)
  203. ) {
  204. return null;
  205. }
  206. $operator = $tokens[$index][1];
  207. $index++;
  208. $this->consumeWhitespaces($tokens, $index);
  209. if (
  210. !array_key_exists($index, $tokens)
  211. || !is_array($tokens[$index])
  212. || !in_array($tokens[$index][0], [T_STRING], true)
  213. ) {
  214. return null;
  215. }
  216. $method = $tokens[$index][1];
  217. $index++;
  218. $this->consumeWhitespaces($tokens, $index);
  219. if (
  220. array_key_exists($index, $tokens)
  221. && $tokens[$index] !== '('
  222. ) {
  223. return null;
  224. }
  225. return $callee . $operator . $method;
  226. }
  227. /**
  228. * @param array<int, array{0:int,1:string,2:int}|string|int> $tokens
  229. */
  230. private function getPropertyAccess(array $tokens, int $index): ?string {
  231. if (
  232. !array_key_exists($index, $tokens)
  233. || !is_array($tokens[$index])
  234. || !in_array($tokens[$index][0], [T_VARIABLE, T_STRING], true)
  235. ) {
  236. return null;
  237. }
  238. $objectOrClass = $tokens[$index][1];
  239. $index++;
  240. $this->consumeWhitespaces($tokens, $index);
  241. if (
  242. !array_key_exists($index, $tokens)
  243. || !is_array($tokens[$index])
  244. || !in_array($tokens[$index][0], [T_OBJECT_OPERATOR , T_DOUBLE_COLON ], true)
  245. ) {
  246. return null;
  247. }
  248. $operator = $tokens[$index][1];
  249. $index++;
  250. $this->consumeWhitespaces($tokens, $index);
  251. if (
  252. !array_key_exists($index, $tokens)
  253. || !is_array($tokens[$index])
  254. || !in_array($tokens[$index][0], [T_STRING, T_VARIABLE], true)
  255. ) {
  256. return null;
  257. }
  258. $propName = $tokens[$index][1];
  259. return $objectOrClass . $operator . $propName;
  260. }
  261. /**
  262. * @param array<int, array{0:int,1:string,2:int}|string|int> $tokens
  263. */
  264. private function isAnonymousFunction(array $tokens, int $index): bool
  265. {
  266. if (
  267. !array_key_exists($index, $tokens)
  268. || !is_array($tokens[$index])
  269. || $tokens[$index][0] !== T_FUNCTION
  270. ) {
  271. return false;
  272. }
  273. $index++;
  274. $this->consumeWhitespaces($tokens, $index);
  275. if (
  276. array_key_exists($index, $tokens)
  277. && $tokens[$index] === '('
  278. ) {
  279. return true;
  280. }
  281. return false;
  282. }
  283. /**
  284. * @param array<int, array{0:int,1:string,2:int}|string|int> $tokens
  285. */
  286. private function isNonLocalVariable(array $tokens, int $index): bool
  287. {
  288. if (
  289. array_key_exists($index, $tokens)
  290. && is_array($tokens[$index])
  291. && $tokens[$index][0] === T_VARIABLE
  292. ) {
  293. if (
  294. in_array(
  295. $tokens[$index][1],
  296. [
  297. '$this',
  298. '$GLOBALS', '$_SERVER', '$_GET', '$_POST', '$_FILES', '$_COOKIE', '$_SESSION', '$_REQUEST', '$_ENV',
  299. ],
  300. true)
  301. ) {
  302. return true;
  303. }
  304. }
  305. return false;
  306. }
  307. /**
  308. * @param array<int, array{0:int,1:string,2:int}|string|int> $tokens
  309. */
  310. private function consumeWhitespaces(array $tokens, int &$index): void {
  311. while (
  312. array_key_exists($index, $tokens)
  313. && is_array($tokens[$index])
  314. && $tokens[$index][0] === T_WHITESPACE
  315. ) {
  316. $index++;
  317. }
  318. }
  319. }