Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
70 / 70 |
|
100.00% |
6 / 6 |
CRAP | |
100.00% |
1 / 1 |
| NameFormatLexer | |
100.00% |
70 / 70 |
|
100.00% |
6 / 6 |
20 | |
100.00% |
1 / 1 |
| tokenize | |
100.00% |
34 / 34 |
|
100.00% |
1 / 1 |
9 | |||
| processBracketGroup | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
3 | |||
| closingBracketPosition | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
| isModifierChar | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| isConditionChar | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| buildPiece | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Drupal\name\Utility; |
| 6 | |
| 7 | /** |
| 8 | * Walks a name format string and produces condition-tagged piece arrays. |
| 9 | * |
| 10 | * A "piece" is an associative array with two keys: |
| 11 | * - 'value' (string): the resolved, modifier-transformed token value. |
| 12 | * - 'conditions' (string): accumulated condition characters. |
| 13 | * |
| 14 | * The lexer intentionally carries no state; all inputs are passed as |
| 15 | * arguments. Bracket groups are resolved recursively via |
| 16 | * NameFormatAssembler::assemble(). |
| 17 | * |
| 18 | * @internal |
| 19 | */ |
| 20 | final class NameFormatLexer { |
| 21 | |
| 22 | /** |
| 23 | * Walks a format string and produces an array of condition-tagged pieces. |
| 24 | * |
| 25 | * @param string $format |
| 26 | * The format string or segment to parse. |
| 27 | * @param array $tokens |
| 28 | * The token map. |
| 29 | * |
| 30 | * @return array[] |
| 31 | * The generated pieces. |
| 32 | */ |
| 33 | public static function tokenize(string $format, array $tokens): array { |
| 34 | $pieces = []; |
| 35 | $modifiers = ''; |
| 36 | $conditions = ''; |
| 37 | |
| 38 | for ($i = 0; $i < strlen($format); $i += 1) { |
| 39 | $char = $format[$i]; |
| 40 | $last_char = ($i > 0) ? $format[$i - 1] : FALSE; |
| 41 | |
| 42 | if ($char === '\\') { |
| 43 | continue; |
| 44 | } |
| 45 | if ($last_char === '\\') { |
| 46 | $pieces[] = self::buildPiece($char, $modifiers, $conditions); |
| 47 | $modifiers = ''; |
| 48 | $conditions = ''; |
| 49 | continue; |
| 50 | } |
| 51 | |
| 52 | if (self::isModifierChar($char)) { |
| 53 | $modifiers .= $char; |
| 54 | continue; |
| 55 | } |
| 56 | |
| 57 | if (self::isConditionChar($char)) { |
| 58 | $conditions .= $char; |
| 59 | continue; |
| 60 | } |
| 61 | |
| 62 | $is_bracket_char = ($char === '(' || $char === ')'); |
| 63 | if ($is_bracket_char) { |
| 64 | $result = self::processBracketGroup($format, $i, $tokens, $modifiers, $conditions); |
| 65 | $pieces[] = $result['piece']; |
| 66 | $i += $result['advance']; |
| 67 | $modifiers = ''; |
| 68 | $conditions = ''; |
| 69 | continue; |
| 70 | } |
| 71 | |
| 72 | $pieces[] = self::buildPiece( |
| 73 | NameFormatTokens::resolveValue($char, $tokens), |
| 74 | $modifiers, |
| 75 | $conditions, |
| 76 | ); |
| 77 | $modifiers = ''; |
| 78 | $conditions = ''; |
| 79 | } |
| 80 | |
| 81 | return $pieces; |
| 82 | } |
| 83 | |
| 84 | /** |
| 85 | * Processes a bracketed segment or preserves an unmatched bracket. |
| 86 | * |
| 87 | * When an opening bracket is found and a matching closing bracket exists, |
| 88 | * the inner segment is recursively tokenized and its conditions applied. |
| 89 | * Unmatched brackets are preserved as literal characters. |
| 90 | * |
| 91 | * @param string $format |
| 92 | * The full format string being parsed. |
| 93 | * @param int $position |
| 94 | * The current position of the bracket character. |
| 95 | * @param array $tokens |
| 96 | * The token map. |
| 97 | * @param string $modifiers |
| 98 | * Accumulated modifier characters. |
| 99 | * @param string $conditions |
| 100 | * Accumulated condition characters. |
| 101 | * |
| 102 | * @return array{piece: array, advance: int} |
| 103 | * The assembled piece and the number of extra characters to advance past. |
| 104 | */ |
| 105 | public static function processBracketGroup( |
| 106 | string $format, |
| 107 | int $position, |
| 108 | array $tokens, |
| 109 | string $modifiers, |
| 110 | string $conditions, |
| 111 | ): array { |
| 112 | $char = $format[$position]; |
| 113 | if ($char !== '(') { |
| 114 | return [ |
| 115 | 'piece' => self::buildPiece($char, $modifiers, $conditions), |
| 116 | 'advance' => 0, |
| 117 | ]; |
| 118 | } |
| 119 | |
| 120 | $remaining_string = substr($format, $position); |
| 121 | $closing_bracket = self::closingBracketPosition($remaining_string); |
| 122 | if ($closing_bracket !== FALSE) { |
| 123 | $segment = substr($format, $position + 1, $closing_bracket - 1); |
| 124 | $sub_string = NameFormatAssembler::assemble(self::tokenize($segment, $tokens)); |
| 125 | |
| 126 | return [ |
| 127 | 'piece' => self::buildPiece($sub_string, $modifiers, $conditions), |
| 128 | 'advance' => $closing_bracket, |
| 129 | ]; |
| 130 | } |
| 131 | |
| 132 | return [ |
| 133 | 'piece' => self::buildPiece($char, $modifiers, $conditions), |
| 134 | 'advance' => 0, |
| 135 | ]; |
| 136 | } |
| 137 | |
| 138 | /** |
| 139 | * Returns the closing bracket position matching the first opening bracket. |
| 140 | * |
| 141 | * Escaped brackets (\( and \)) are treated as plain characters during the |
| 142 | * depth scan. |
| 143 | * |
| 144 | * @param string $string |
| 145 | * The string starting with the opening bracket character. |
| 146 | * |
| 147 | * @return int|false |
| 148 | * The zero-based position of the closing bracket, or FALSE when not found. |
| 149 | */ |
| 150 | public static function closingBracketPosition(string $string): int|false { |
| 151 | $depth = 0; |
| 152 | $string = str_replace(['\(', '\)'], ['__', '__'], $string); |
| 153 | |
| 154 | for ($i = 0; $i < strlen($string); $i += 1) { |
| 155 | $char = $string[$i]; |
| 156 | if ($char === '(') { |
| 157 | $depth += 1; |
| 158 | } |
| 159 | elseif ($char === ')') { |
| 160 | $depth -= 1; |
| 161 | if ($depth === 0) { |
| 162 | return $i; |
| 163 | } |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | return FALSE; |
| 168 | } |
| 169 | |
| 170 | /** |
| 171 | * Returns TRUE when the character is a modifier. |
| 172 | * |
| 173 | * @param string $char |
| 174 | * A single format character. |
| 175 | * |
| 176 | * @return bool |
| 177 | * TRUE when the character is a modifier, otherwise FALSE. |
| 178 | */ |
| 179 | public static function isModifierChar(string $char): bool { |
| 180 | return in_array($char, ['L', 'U', 'F', 'T', 'S', 'G', 'B', 'b'], TRUE); |
| 181 | } |
| 182 | |
| 183 | /** |
| 184 | * Returns TRUE when the character is a condition flag. |
| 185 | * |
| 186 | * @param string $char |
| 187 | * A single format character. |
| 188 | * |
| 189 | * @return bool |
| 190 | * TRUE when the character is a condition, otherwise FALSE. |
| 191 | */ |
| 192 | public static function isConditionChar(string $char): bool { |
| 193 | return in_array($char, ['=', '^', '|', '+', '-', '~'], TRUE); |
| 194 | } |
| 195 | |
| 196 | /** |
| 197 | * Builds a single piece array after applying modifiers. |
| 198 | * |
| 199 | * @param string $value |
| 200 | * The resolved token value. |
| 201 | * @param string $modifiers |
| 202 | * Accumulated modifier characters. |
| 203 | * @param string $conditions |
| 204 | * Accumulated condition characters. |
| 205 | * |
| 206 | * @return array{value: string, conditions: string} |
| 207 | * The piece. |
| 208 | */ |
| 209 | private static function buildPiece( |
| 210 | string $value, |
| 211 | string $modifiers, |
| 212 | string $conditions, |
| 213 | ): array { |
| 214 | return [ |
| 215 | 'value' => NameFormatModifiers::apply($value, $modifiers), |
| 216 | 'conditions' => $conditions, |
| 217 | ]; |
| 218 | } |
| 219 | |
| 220 | } |