参考文档:

通过达成以下目标来取得学习成果

  1. 扩展 SQLite 的 json_extract 中的 path 语法
  2. 扩展 SQLite 语法,支持 -> 和 ->> 语法糖

# 扩展 SQLite 的 json_extract 中的 path 语法

官方文档给出了 SQLite 中 json_extract 函数支持的 path

For functions that accept PATH arguments, that PATH must be well-formed or else the function will throw an error. A well-formed PATH is a text value that begins with exactly one '$' character followed by zero or more instances of ".objectlabel" or "[arrayindex]".

可以看到只支持点属性和下标,不支持通配符 ***

  • json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$')'{"a":2,"c":[4,5,{"f":7}]}'
  • json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c')'[4,5,{"f":7}]'
  • json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c[2]')'{"f":7}'
  • json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c[2].f')7
  • json_extract('{"a":2,"c":[4,5],"f":7}', '$.c','$.a')'[[4,5],2]'
  • json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.x')NULL
  • json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.x', '$.a')'[null,2]'

扩展目标是实现 MySQL 8.0 中的 path 语法

pathExpression:
    scope[(pathLeg)*]

pathLeg:
    member | arrayLocation | doubleAsterisk

member:
    period ( keyName | asterisk )

arrayLocation:
    leftBracket ( nonNegativeInteger | asterisk ) rightBracket

keyName:
    ESIdentifier | doubleQuotedString

doubleAsterisk:
    '**'

period:
    '.'

asterisk:
    '*'

leftBracket:
    '['

rightBracket:
    ']'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

简要描述为以下规则

  1. 支持 . + 属性 的形式,其中属性可以是普通的值,也可以是双引号引起来的字符串(因为 json key 中可能有各种奇怪的字符)
  2. 支持 .* 的形式匹配对象的所有属性
  3. 支持 [非负整数] 的形式匹配数组元素
  4. 支持 [*] 的形式匹配数组中的所有元素
  5. 支持 ** 的形式匹配任意深度路径,不能作为结束

需要处理的是规则 2、4、5

举例

mysql> SELECT JSON_EXTRACT('{"a": 30, "b": 40}', '$.*');
+-------------------------------------------+
| JSON_EXTRACT('{"a": 30, "b": 40}', '$.*') |
+-------------------------------------------+
| [30, 40]                                  |
+-------------------------------------------+
1 row in set (0.00 sec)

mysql> SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[2][*]');
+-----------------------------------------------+
| JSON_EXTRACT('[10, 20, [30, 40]]', '$[2][*]') |
+-----------------------------------------------+
| [30, 40]                                      |
+-----------------------------------------------+
1 row in set (0.00 sec)

mysql> SELECT JSON_EXTRACT('[10, 20, {"a": 30, "b": 40}]', '$**.a');
+-------------------------------------------------------+
| JSON_EXTRACT('[10, 20, {"a": 30, "b": 40}]', '$**.a') |
+-------------------------------------------------------+
| [30]                                                  |
+-------------------------------------------------------+
1 row in set (0.00 sec)

mysql> SELECT JSON_EXTRACT('{"b": 10, "c": 20, "d": {"a": 30, "b": 40}}', '$**.a');
+----------------------------------------------------------------------+
| JSON_EXTRACT('{"b": 10, "c": 20, "d": {"a": 30, "b": 40}}', '$**.a') |
+----------------------------------------------------------------------+
| [30]                                                                 |
+----------------------------------------------------------------------+
1 row in set (0.00 sec)

mysql> SELECT JSON_EXTRACT('{"b": 10, "c": 20, "a": {"a": {"a": 30, "b": 40}}}', '$**.a');
+-----------------------------------------------------------------------------+
| JSON_EXTRACT('{"b": 10, "c": 20, "a": {"a": {"a": 30, "b": 40}}}', '$**.a') |
+-----------------------------------------------------------------------------+
| [{"a": {"a": 30, "b": 40}}, {"a": 30, "b": 40}, 30]                         |
+-----------------------------------------------------------------------------+
1 row in set (0.00 sec)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

# 获得编译体验

  1. download 页 下载 autoconf 版的源码

  2. 在 WSL 环境中编译

    ./configure
    make
    
    1
    2
  3. 执行验证

    sqlite> select json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$');
    {"a":2,"c":[4,5,{"f":7}]}
    sqlite> SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[1]');
    20
    sqlite> SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[2][*]');
    Error: JSON path error near '[*]'
    sqlite> .exit
    
    1
    2
    3
    4
    5
    6
    7

整个过程行云流水,到处都是优秀 C 项目的特有香味

# 修改源文件

json 支持是以扩展的形式添加的

很容易依次定位到 sqlite.c 中的函数

/*
** Search along zPath to find the node specified.  Return a pointer
** to that node, or NULL if zPath is malformed or if there is no such
** node.
**
** If pApnd!=0, then try to append new nodes to complete zPath if it is
** possible to do so and if no existing node corresponds to zPath.  If
** new nodes are appended *pApnd is set to 1.
*/
static JsonNode *jsonLookupStep(
  JsonParse *pParse,      /* The JSON to search */
  u32 iRoot,              /* Begin the search at this node */
  const char *zPath,      /* The path to search */
  int *pApnd,             /* Append nodes to complete path if not NULL */
  const char **pzErr      /* Make *pzErr point to any syntax error in zPath */
){
  u32 i, j, nKey;
  const char *zKey;
  JsonNode *pRoot = &pParse->aNode[iRoot];
  if( zPath[0]==0 ) return pRoot;
  if( pRoot->jnFlags & JNODE_REPLACE ) return 0;
  if( zPath[0]=='.' ){
    if( pRoot->eType!=JSON_OBJECT ) return 0;
    zPath++;
    if( zPath[0]=='"' ){
      zKey = zPath + 1;
      for(i=1; zPath[i] && zPath[i]!='"'; i++){}
      nKey = i-1;
      if( zPath[i] ){
        i++;
      }else{
        *pzErr = zPath;
        return 0;
      }
    }else{
      zKey = zPath;
      for(i=0; zPath[i] && zPath[i]!='.' && zPath[i]!='['; i++){}
      nKey = i;
    }
    if( nKey==0 ){
      *pzErr = zPath;
      return 0;
    }
    j = 1;
    for(;;){
      while( j<=pRoot->n ){
        if( jsonLabelCompare(pRoot+j, zKey, nKey) ){
          return jsonLookupStep(pParse, iRoot+j+1, &zPath[i], pApnd, pzErr);
        }
        j++;
        j += jsonNodeSize(&pRoot[j]);
      }
      if( (pRoot->jnFlags & JNODE_APPEND)==0 ) break;
      iRoot += pRoot->u.iAppend;
      pRoot = &pParse->aNode[iRoot];
      j = 1;
    }
    if( pApnd ){
      u32 iStart, iLabel;
      JsonNode *pNode;
      iStart = jsonParseAddNode(pParse, JSON_OBJECT, 2, 0);
      iLabel = jsonParseAddNode(pParse, JSON_STRING, nKey, zKey);
      zPath += i;
      pNode = jsonLookupAppend(pParse, zPath, pApnd, pzErr);
      if( pParse->oom ) return 0;
      if( pNode ){
        pRoot = &pParse->aNode[iRoot];
        pRoot->u.iAppend = iStart - iRoot;
        pRoot->jnFlags |= JNODE_APPEND;
        pParse->aNode[iLabel].jnFlags |= JNODE_RAW;
      }
      return pNode;
    }
  }else if( zPath[0]=='[' && safe_isdigit(zPath[1]) ){
    if( pRoot->eType!=JSON_ARRAY ) return 0;
    i = 0;
    j = 1;
    while( safe_isdigit(zPath[j]) ){
      i = i*10 + zPath[j] - '0';
      j++;
    }
    if( zPath[j]!=']' ){
      *pzErr = zPath;
      return 0;
    }
    zPath += j + 1;
    j = 1;
    for(;;){
      while( j<=pRoot->n && (i>0 || (pRoot[j].jnFlags & JNODE_REMOVE)!=0) ){
        if( (pRoot[j].jnFlags & JNODE_REMOVE)==0 ) i--;
        j += jsonNodeSize(&pRoot[j]);
      }
      if( (pRoot->jnFlags & JNODE_APPEND)==0 ) break;
      iRoot += pRoot->u.iAppend;
      pRoot = &pParse->aNode[iRoot];
      j = 1;
    }
    if( j<=pRoot->n ){
      return jsonLookupStep(pParse, iRoot+j, zPath, pApnd, pzErr);
    }
    if( i==0 && pApnd ){
      u32 iStart;
      JsonNode *pNode;
      iStart = jsonParseAddNode(pParse, JSON_ARRAY, 1, 0);
      pNode = jsonLookupAppend(pParse, zPath, pApnd, pzErr);
      if( pParse->oom ) return 0;
      if( pNode ){
        pRoot = &pParse->aNode[iRoot];
        pRoot->u.iAppend = iStart - iRoot;
        pRoot->jnFlags |= JNODE_APPEND;
      }
      return pNode;
    }
  }else{
    *pzErr = zPath;
  }
  return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

读懂没有什么困难,递归解析,而且也没考虑转移字符的问题,比如以下的查询无法在 sqlite 中执行

mysql> SELECT JSON_EXTRACT('{"a\\"": 30, "b": 40}', '$."a\\""');
+---------------------------------------------------+
| JSON_EXTRACT('{"a\\"": 30, "b": 40}', '$."a\\""') |
+---------------------------------------------------+
| 30                                                |
+---------------------------------------------------+
1 row in set (0.00 sec)
1
2
3
4
5
6
7

对应的 MySQL 中的代码在 Item_func_json_extract::val_json

分为三步:

  1. 验证、解析、缓存 path m_path_cache.parse_and_cache_path(args, i, false)
  2. 对每个 path 找到对应的 json 结果 w.seek(*path, path->leg_count(), &v, true, false)
  3. 根据 path 个数和特征封装结果为 json 数组或对象 if (could_return_multiple_matches) {