Nginx parser with PHP and Bison

Today I'll try to parse Nginx config into AST.
I get the actual Nginx config from official Symfony documentation to test the parser.

server {
    server_name domain.tld www.domain.tld;
    root /var/www/project/public;

    location / {
        # try to serve file directly, fallback to index.php
        try_files $uri /index.php$is_args$args;

    location /bundles {
        try_files $uri =404;

    location ~ ^/index\.php(/|$) {
        fastcgi_pass unix:/var/run/php/php-fpm.sock;
        fastcgi_split_path_info ^(.+\.php)(/.*)$;
        include fastcgi_params;

        # optionally set the value of the environment variables used in the application
        fastcgi_param APP_ENV prod;
        fastcgi_param APP_SECRET <app-secret-id>;
        fastcgi_param DATABASE_URL "mysql://db_user:db_pass@host:3306/db_name";
        fastcgi_param SCRIPT_FILENAME $realpath_root$fastcgi_script_name;
        fastcgi_param DOCUMENT_ROOT $realpath_root;       

    location ~ \.php$ {
        return 404;

    error_log /var/log/nginx/project_error.log;
    access_log /var/log/nginx/project_access.log;

First, we need to install PHP dependencies.

composer require --dev mrsuh/php-bison-skeleton
composer require mrsuh/tree-printer
composer require doctrine/lexer

We will store our files like this:

├── /ast-parser
    ├── /bin
    │   └── parse.php # entry point to parse nginx configs
    ├── /lib
    │   └── parser.php # generated file
    ├── /src
    │   ├── Lexer.php
    │   └── Node.php # AST node
    └── grammar.y       

The Node class must implement Mrsuh\Tree\NodeInterface to print AST.


namespace App;

use Mrsuh\Tree\NodeInterface;

class Node implements NodeInterface
    private string $name;
    /** @var array<string, mixed> */
    private array $attributes;
    /** @var Node[] */
    private array $children;

    public function __construct(string $name, array $attributes = [], array $children = [])
        $this->name       = $name;
        $this->attributes = $attributes;
        $this->children   = $children;

    public function getChildren(): array
        return $this->children;

    public function __toString(): string
        $line = $this->name;
        if (!empty($this->attributes)) {
            $line .= ' {';
            foreach ($this->attributes as $key => $value) {
                $line .= sprintf(
                    " %s: '%s'",
                    is_array($value) ? implode(', ', $value) : $value
            $line .= ' }';

        return $line;

This time I'll use Doctrine lexer library. It can help to parse complex text.


namespace App;

use Doctrine\Common\Lexer\AbstractLexer;

class Lexer extends AbstractLexer implements LexerInterface
    public function __construct($resource)

    protected function getCatchablePatterns(): array
        return [';'];

    protected function getNonCatchablePatterns(): array
        return [' ','[\n]+','#[^\n]+']; // skip spaces, eol, and comments 

    protected function getType(&$value): int
        switch ($value) {
            case 'server':
                return LexerInterface::T_SERVER;
            case 'server_name':
                return LexerInterface::T_SERVER_NAME;


        return ord($value);

    public function yyerror(string $message): void
        printf("%s\n", $message);

    public function getLVal()
        return $this->token->value;

    public function yylex(): int
        if (!$this->lookahead) {
            return LexerInterface::YYEOF;


        return $this->token->type;

For example, Lexer will translate the Nginx config below

server {
    server_name domain.tld www.domain.tld;
    root /var/www/project/public;

    location / {
        # try to serve file directly, fallback to index.php
        try_files $uri /index.php$is_args$args;

into this:

word token
server LexerInterface::T_SERVER (258)
{ ASCII (123)
server_name LexerInterface::T_SERVER_NAME (259)
domain.tld LexerInterface::T_SERVER_NAME_VALUE (260)
www.domain.tld LexerInterface::T_SERVER_NAME_VALUE (260)
; ASCII (59)
root LexerInterface::T_SERVER_ROOT (261)
/var/www/project/public LexerInterface::T_SERVER_ROOT_PATH (262)
; ASCII (59)
location LexerInterface::T_LOCATION (263)
/ ASCII (264)
{ ASCII (123)
try_files LexerInterface::T_TRY_FILES (283)
$uri LexerInterface::T_TRY_FILES_PATH (284)
/index.php$is_args$args LexerInterface::T_TRY_FILES_PATH (284)
; ASCII (59)
} ASCII (125)
} ASCII (125)
LexerInterface::YYEOF (0)

Time to create grammar.y file and build lib/parser.php

We will use block %code parser to define variables and methods to store AST into the Parser class.
You can find full grammar file here.

%define api.parser.class {Parser}
%define api.namespace {App}
%code parser {
    private Node $ast;
    public function setAst(Node $ast): void { $this->ast = $ast; }
    public function getAst(): Node { return $this->ast; }

%token T_SERVER
%token T_TRY_FILES

  T_SERVER '{' server_body_list '}' { self::setAst(new Node('T_SERVER', [], $3)); }

  T_SERVER_NAME_VALUE                     { $$ = [$1]; }
| server_name_values T_SERVER_NAME_VALUE  { $$ = $1; $$[] = $2; }

  T_SERVER_NAME server_name_values ';'  { $$ = new Node('T_SERVER_NAME', ['names' => $2]); }
| T_SERVER_ROOT T_SERVER_ROOT_PATH ';'  { $$ = new Node('T_SERVER_ROOT', ['path' => $2]); }
| T_ERROR_LOG T_ERROR_LOG_PATH ';'      { $$ = new Node('T_ERROR_LOG', ['path' => $2]); }
| T_ACCESS_LOG T_ACCESS_LOG_PATH ';'    { $$ = new Node('T_ACCESS_LOG', ['path' => $2]); }
bison -S vendor/mrsuh/php-bison-skeleton/src/php-skel.m4 -o lib/parser.php grammar.y

Command options:

The final PHP file is the entry point for the parser.


require_once __DIR__ . '/../vendor/autoload.php';

use App\Parser;
use App\Lexer;
use Mrsuh\Tree\Printer;

$lexer  = new Lexer(fopen($argv[1], 'r'));
$parser = new Parser($lexer);
if (!$parser->parse()) {

$printer = new Printer();

Autoload for generated lib/parser.php file.

    "autoload": {
        "psr-4": {
            "App\\": "src/"
        "files": ["lib/parser.php"]

Finally, we can test our parser.

php bin/parse.php nginx.conf
    ├── T_SERVER_NAME { names: 'domain.tld, www.domain.tld' }
    ├── T_SERVER_ROOT { path: '/var/www/project/public' }
    ├── T_LOCATION { regexp: '' path: '/' }
    │   └── T_TRY_FILES { paths: '$uri, /index.php$is_args$args' }
    ├── T_LOCATION { regexp: '' path: '/bundles' }
    │   └── T_TRY_FILES { paths: '$uri, =404' }
    ├── T_LOCATION { regexp: '~' path: '^/index\.php(/|$)' }
    │   ├── T_FAST_CGI_PATH { path: 'unix:/var/run/php/php-fpm.sock' }
    │   ├── T_FAST_CGI_SPLIT_PATH_INFO { path: '^(.+\.php)(/.*)$' }
    │   ├── T_INCLUDE { path: 'fastcgi_params' }
    │   ├── T_FAST_CGI_PARAM { APP_ENV: 'prod' }
    │   ├── T_FAST_CGI_PARAM { APP_SECRET: '<app-secret-id>' }
    │   ├── T_FAST_CGI_PARAM { DATABASE_URL: '"mysql://db_user:db_pass@host:3306/db_name"' }
    │   ├── T_FAST_CGI_PARAM { SCRIPT_FILENAME: '$realpath_root$fastcgi_script_name' }
    │   ├── T_FAST_CGI_PARAM { DOCUMENT_ROOT: '$realpath_root' }
    │   └── T_INTERNAL
    ├── T_LOCATION { regexp: '~' path: '\.php$' }
    │   └── T_RETURN { code: '404' body: '' }
    ├── T_ERROR_LOG { path: '/var/log/nginx/project_error.log' }
    └── T_ACCESS_LOG { path: '/var/log/nginx/project_access.log' }

It works!

You can get the parser source code here and test it by yourself.

Some useful links: