Cython二进制逆向系列（三）运算符

在开始前，先给出本文用到的py源代码

def test1(x, y):

    #   数学运算符

    a = x + y

    b = x - y

    c = x * y

    d = x / y

    e = x // y

    f = x % y

    g = x ** y

    #   位运算符

    h = x & y

    i = x | y

    j = x ^ y

    k = ~x

    l = x >> 4

    m = x << 2

    print(a, b, c, d, e, f, g, h, i, j, k, l, m)

def test2(x, y):

    #   in/not in 运算符

    if x in y:

        x = y

    elif x not in y:

        y = x

    print(x, y)

def test3(x, y):

    #  ==运算符与逻辑运算符

    print(x == 0 and y == 0)

    print(y == 0 or x == 0)

    print(not x==0)

if __name__ == '__main__':

    test1(1, 2)

    test2(1, 2)

    test3(1, 2)

在这篇文章里，我们会讨论Cython是如何处理运算符的（数学运算符、位运算符、in/not in 运算符、 ==运算符与逻辑运算符）。总的来叔其中大部分是调用虚拟机api来实现的。

数学运算符与位运算符号

可以看得出来全是调用虚拟机的api

下面给出运算符与api的对应表（其实看名字大概都能猜出来）：

符号	含义	函数名
+	加	PyNumber_Add
-	减	PyNumber_Subtract
*	乘	PyNumber_Multiply
/	除	__Pyx_PyNumber_Divide
//	整除	PyNumber_FloorDivide
%	取模	PyNumber_Remainder
**	乘方	PyNumber_Power
&	按位与	PyNumber_And
\|	按位或	PyNumber_Or
^	按位异或	PyNumber_Xor
~	按位取非	PyNumber_Invert
>>	右移	PyNumber_Rshift
<<	左移	PyNumber_Lshift

这里单独看一下位移在ida中的体现

v24 = off_1800095B8[32];

  if ( *(_QWORD *)(v4 + 8) != PyLong_Type[0] )

  {

    v27 = PyNumber_Rshift(v4, off_1800095B8[32]);

LABEL_35:

    v4 = v27;

    goto LABEL_36;

  }

  v25 = *(_QWORD *)(v4 + 16);

  if ( v25 )

  {

    if ( ((v25 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 )

    {

      v26 = v25 + 4;

      switch ( v26 )

      {

        case 2i64:

          v27 = PyLong_FromLongLong(

                  -(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,

                  v26,

                  v24,

                  0x180000000ui64);

          break;

        case 6i64:

          v27 = PyLong_FromLongLong(

                  (__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,

                  v26,

                  v24,

                  0x180000000ui64);

          break;

        default:

          v27 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 96i64))(v4, off_1800095B8[32]);

          break;

      }

    }

    else

    {

      v28 = -*(_DWORD *)(v4 + 24);

      if ( v25 >= 0 )

        v28 = *(_DWORD *)(v4 + 24);

      v27 = PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);

    }

    goto LABEL_35;

  }

  ++*(_QWORD *)v4;

LABEL_36:

  if ( !v4 )

  {

    v12 = 2534i64;

    v13 = 13i64;

    goto LABEL_58;

  }

  v10 = (_QWORD *)v4;

off_1800095B8[32]中储存就是4，这里python为了安全性还有对于整数的处理做了安全措施，我们可以看到在else后面PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);这里也可以看到是右移多少。

问题是，这里好像没看到表格中的PyNumber_Rshift？因为py源代码中位移的位数是立即数，因此直接转换为c语言的位移运算符就好了。但是如果是x>>y这样的两个都是变量，就会调用api PyNumber_Rshift

in/not in 运算符

 /* "test.py":21

 * def test2(x, y):

 *     #   in/not in

 *     if x in y:             # <<<<<<<<<<<<<<

 *         x = y

 *     elif x not in y:

 */

  __pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 21, __pyx_L1_error)

      。。。。。。

 /* "test.py":23

 *     if x in y:

 *         x = y

 *     elif x not in y:             # <<<<<<<<<<<<<<

 *         y = x

 *     print(x, y)

 */

  __pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_NE)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 23, __pyx_L1_error)

这里涉及到一些条件语句的转换，不过没关系，照样能看懂

在c代码中可以看到无论是in还是 not in 调用的都是函数__Pyx_PySequence_ContainsTF。其前两个参数是前后两个参与运算的变量，而第三个参数Py_EQ/Py_NE则决定当前运算到底是in还是 not in

不幸的是，无论是in还是not in ，在ida中都是PySequence_Contains，具体是哪个要结合上下文分析。比如这里v5 = PySequence_Contains(a3) 判断的是 a3 中是否包含 a2。如果 v5 == 1，表示 a2 在 a3 中，则进入接下来的操作（++*v3 和调整 v4 和 v3 的指向）。

而下面那个v9 = PySequence_Contains(v3) 判断的是 v3 中是否包含 v4（即 v4 not in v3）。这里，如果 v9 == 0，表示 v4 不在 v3 中，符合 not in 的语义。因为当 v9 == 0 时表示 v4 不在 v3 中。

说人话就是看后续是对PySequence_Contains的返回值和谁比较（1或者0）。

==运算符与逻辑运算符

逻辑与运算符的处理

  /* "test.py":30

 * def test3(x, y):

 *     #  ==

 *     print(x == 0 and y == 0)             # <<<<<<<<<<<<<<

 *     print(y == 0 or x == 0)

 *     print(not x==0)

 */

  __pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)

  __Pyx_GOTREF(__pyx_t_2);

  __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 30, __pyx_L1_error)

  if (__pyx_t_3) {

    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  } else {

    __Pyx_INCREF(__pyx_t_2);

    __pyx_t_1 = __pyx_t_2;

    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

    goto __pyx_L3_bool_binop_done;

  }

  __pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)

  __Pyx_GOTREF(__pyx_t_2);

  __Pyx_INCREF(__pyx_t_2);

  __pyx_t_1 = __pyx_t_2;

  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  __pyx_L3_bool_binop_done:;

__Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0): 这行代码将 x == 0 的比较操作转换为 C 语言函数。它检查 x 是否等于 0。（猜测不同类型的==有对应的函数，暂未验证）。

ida中比较==0的部分，看得出来它把变量分为int float 和其他三种情况，除了整数和浮点，一概用PyObject_RichCompare比较。

在 C 代码中，and 逻辑运算符的处理通常是短路的。即，如果第一个条件为 False，那么第二个条件不会被计算。在这里，编译后的代码会继续执行 y == 0 的检查，只有在 x == 0 为 True 时才会检查 y == 0。

然后__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0) 检查 y == 0，并根据结果将 __pyx_t_2 设置为布尔值。

ida中对and的处理也差不多类似。看着有点恶心，全是if else条件分支和各种goto

逻辑或运算符的处理

 /* "test.py":31

 *     #  ==

 *     print(x == 0 and y == 0)

 *     print(y == 0 or x == 0)             # <<<<<<<<<<<<<<

 *     print(not x==0)

 *

 */

  __pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)

  __Pyx_GOTREF(__pyx_t_1);

  __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 31, __pyx_L1_error)

  if (!__pyx_t_3) {

    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  } else {

    __Pyx_INCREF(__pyx_t_1);

    __pyx_t_2 = __pyx_t_1;

    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

    goto __pyx_L5_bool_binop_done;

  }

  __pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)

  __Pyx_GOTREF(__pyx_t_1);

  __Pyx_INCREF(__pyx_t_1);

  __pyx_t_2 = __pyx_t_1;

  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  __pyx_L5_bool_binop_done:;

前面都是在处理== ：__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0): 检查 y == 0，即比较 y 是否等于 0。__Pyx_PyObject_IsTrue(__pyx_t_1): 将 __pyx_t_1 转换为布尔值。如果 y == 0（即 __pyx_t_3 为 True），就直接跳到 __pyx_L5_bool_binop_done，并将 __pyx_t_1（存储 y == 0 结果）传递给下一个操作。

在执行 or 运算时，短路操作符同样会起作用：如果 y == 0 为 True，则 x == 0 的比较不会被执行，结果会直接为 True。__pyx_t_2 保存了 y == 0 或 x == 0 的结果，它将作为最终的结果传递给 print 函数。

逻辑非运算符的处理

  /* "test.py":32

 *     print(x == 0 and y == 0)

 *     print(y == 0 or x == 0)

 *     print(not x==0)             # <<<<<<<<<<<<<<

 *

 *

 */

  __pyx_t_3 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 32, __pyx_L1_error)

  __pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_3)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 32, __pyx_L1_error)

  __Pyx_GOTREF(__pyx_t_1);

  __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_print, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 32, __pyx_L1_error)

  __Pyx_GOTREF(__pyx_t_2);

  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

!__pyx_t_3：这行代码计算 not x == 0。由于 __pyx_t_3 是 x == 0 的布尔值，!__pyx_t_3 就是其逻辑取反。__Pyx_PyBool_FromLong((!__pyx_t_3))将 !__pyx_t_3 转换为 Python 的布尔对象。如果 !__pyx_t_3 为 0，则返回 False；如果 !__pyx_t_3 为 1，则返回 True。

如果以后逆向在这里出题，考察逻辑运算符，那就认命吧，这里反编译出的代码很绕。

下面粘上test3函数的反编译代码。

// write access to const memory has been detected, the output may be wrong!

__int64 __fastcall sub_180001E30(__int64 a1, __int64 a2, __int64 a3)

{

  v5 = *((_QWORD *)off_18000B688 + 35);

  if ( a2 == v5 )

    goto LABEL_2;

  v7 = *(_QWORD *)(a2 + 8);

  if ( v7 == PyLong_Type )

  {

    if ( *(_QWORD *)(a2 + 16) )

    {

LABEL_5:

      v6 = (_QWORD *)++Py_FalseStruct;

      goto LABEL_10;

    }

LABEL_2:

    v6 = (_QWORD *)++Py_TrueStruct;

    goto LABEL_10;

  }

  if ( v7 == PyFloat_Type )

  {

    if ( *(double *)(a2 + 16) != 0.0 )

      goto LABEL_5;

    goto LABEL_2;

  }

  v6 = (_QWORD *)PyObject_RichCompare(a2, v5, 2LL);

LABEL_10:

  if ( !v6 )

  {

    v8 = 30;

    v9 = 3136;

LABEL_75:

    sub_180005F50("test.test3", v9, v8, (__int64)"test.py");

    return 0LL;

  }

  IsTrue = v6 == (_QWORD *)Py_TrueStruct;

  v11 = v6 == (_QWORD *)Py_NoneStruct;

  v12 = IsTrue | v11 | (unsigned int)(v6 == (_QWORD *)Py_FalseStruct);

  if ( !(IsTrue | (v11 || v6 == (_QWORD *)Py_FalseStruct)) )

    IsTrue = PyObject_IsTrue(v6);

  if ( IsTrue < 0 )

  {

    v8 = 30;

    v9 = 3138;

    goto LABEL_73;

  }

  v13 = *v6;

  if ( !IsTrue )

  {

    *v6 = v13;

    v16 = v6;

    if ( v13 )

      goto LABEL_26;

    v18 = v6;

    goto LABEL_25;

  }

  v14 = v13 - 1;

  *v6 = v14;

  if ( !v14 )

    Py_Dealloc(v6);

  v15 = (_QWORD *)sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));

  v16 = v15;

  if ( !v15 )

  {

    v8 = 30;

    v9 = 3147;

    goto LABEL_75;

  }

  v17 = *v15;

  *v16 = v17;

  if ( !v17 )

  {

    v18 = v16;

LABEL_25:

    Py_Dealloc(v18);

  }

LABEL_26:

  v6 = v16;

  v19 = (_QWORD *)sub_1800048D0(v12, v16);

  if ( !v19 )

  {

    v8 = 30;

    v9 = 3153;

    if ( !v6 )

      goto LABEL_75;

LABEL_73:

    v20 = (*v6)-- == 1LL;

    if ( v20 )

      Py_Dealloc(v6);

    goto LABEL_75;

  }

  v20 = (*v16)-- == 1LL;

  if ( v20 )

    Py_Dealloc(v16);

  v20 = (*v19)-- == 1LL;

  if ( v20 )

    Py_Dealloc(v19);

  v21 = sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));

  v6 = (_QWORD *)v21;

  if ( !v21 )

  {

    v8 = 31;

    v9 = 3165;

    goto LABEL_75;

  }

  v22 = sub_180006570(v21);

  v23 = (unsigned int)v22;

  if ( v22 < 0 )

  {

    v8 = 31;

    v9 = 3167;

    goto LABEL_73;

  }

  v24 = *v6;

  if ( !(_DWORD)v23 )

  {

    v25 = v24 - 1;

    *v6 = v25;

    if ( !v25 )

      Py_Dealloc(v6);

    v26 = (_QWORD *)sub_180004780(a2, *((_QWORD *)off_18000B688 + 35));

    v6 = v26;

    if ( !v26 )

    {

      v8 = 31;

      v9 = 3176;

      goto LABEL_75;

    }

    v24 = *v26;

  }

  *v6 = v24;

  if ( !v24 )

    Py_Dealloc(v6);

  v28 = (_QWORD *)sub_1800048D0(v23, v6);

  if ( !v28 )

  {

    v8 = 31;

    v9 = 3182;

    if ( !v6 )

      goto LABEL_75;

    goto LABEL_73;

  }

  v20 = (*v6)-- == 1LL;

  if ( v20 )

    Py_Dealloc(v6);

  v20 = (*v28)-- == 1LL;

  if ( v20 )

    Py_Dealloc(v28);

  v29 = *((_QWORD *)off_18000B688 + 35);

  if ( a2 == v29 )

    goto LABEL_68;

  v30 = *(_QWORD *)(a2 + 8);

  if ( v30 == PyLong_Type )

  {

    v31 = *(_QWORD *)(a2 + 16) == 0LL;

  }

  else if ( v30 == PyFloat_Type )

  {

    if ( *(double *)(a2 + 16) == 0.0 )

      goto LABEL_68;

    v31 = 0;

  }

  else

  {

    v32 = PyObject_RichCompare(a2, v29, 2LL);

    v33 = (_QWORD *)v32;

    if ( v32 )

    {

      v31 = v32 == Py_TrueStruct;

      v34 = v32 == Py_NoneStruct;

      v27 = v31 | v34 | (unsigned int)(v33 == (_QWORD *)Py_FalseStruct);

      if ( !(v31 | (v34 || v33 == (_QWORD *)Py_FalseStruct)) )

        v31 = PyObject_IsTrue(v33);

      v20 = (*v33)-- == 1LL;

      if ( v20 )

        Py_Dealloc(v33);

    }

    else

    {

      v31 = -1;

    }

  }

  if ( v31 < 0 )

  {

    v8 = 32;

    v9 = 3194;

    goto LABEL_75;

  }

  if ( !v31 )

  {

    v6 = (_QWORD *)++Py_TrueStruct;

    goto LABEL_69;

  }

LABEL_68:

  v6 = (_QWORD *)++Py_FalseStruct;

LABEL_69:

  if ( !v6 )

  {

    v8 = 32;

    v9 = 3195;

    goto LABEL_75;

  }

  v35 = (_QWORD *)sub_1800048D0(v27, v6);

  if ( !v35 )

  {

    v8 = 32;

    v9 = 3197;

    goto LABEL_73;

  }

  v20 = (*v6)-- == 1LL;

  if ( v20 )

    Py_Dealloc(v6);

  v20 = (*v35)-- == 1LL;

  if ( v20 )

    Py_Dealloc(v35);

  return Py_NoneStruct++;

}