python自动化开发-[第五天]-面向过程、模块、包

今日概要：

　　1、内置模块

　　2、协程函数

　　3、递归

　　4、面向过程编程与函数编程

　　5、模块

　　6、包

　　7、re正则

一、内置模块

　　1、匿名函数lambda

　　定义：匿名函数通常是创建了可以被调用的函数，它返回了函数，而并没有将这个函数命名

#不使用匿名函数

def func(x,y):

    return x+y

func(1,2)

#使用匿名函数

f=lambda x,y:x+y

print(f(1,2))

　　2、max,zip(拉链函数),sorted用法

age={

'dragon':18,

'panda':20,

'banana':21,

'lili':30

}

print(max(age))

#默认是以字典的key进行排序，key如果是字符串就按照首位顺序，首位如果一致，一次递归比较

#方法一，用zip将key和values反转

res=zip(age.values(),age.keys())

print(max(res))

def foo(k):

    return age[k]

print(max(age,key=foo))

print(max(age,key=lambda k:age[k]))

print(min(age,key=lambda k:age[k]))

print(sorted(age)) #默认的排序结果是从小到大

print(sorted(age,key=lambda x:age[x])) #默认的排序结果是从小到大

print(sorted(age,key=lambda x:age[x],reverse=True)) #默认的排序结果是从小到大，resverse反转从大到小

　　3、map,reduce,filter

　　　　1、map

#map,将lambda函数对应的关系映射到列表里

l=['dragon','banana','water']

res=map(lambda x:x+'_guess',l)

print(res)  #返回值为迭代器

print (list(res))

　　　 2、reduce

#1、默认不加参数，从列表中取出一个值当作元素，然后依次取出第二个值到最后依次与第一个元素做运算，最终得出最后的值

#2、加参数后就从参数中取第一个值

from functools import reduce

l=[1,2,3,4,5]

print(reduce(lambda x,y:x+y,l,10))

　　　　3、filter

#filter过滤规则，将匹配的元素提取出来

l=['test123','goto123','run123','pass']

res=filter(lambda x:x.endswith('123'),l)

print(list(res))

二、协程函数之一(yield和send)

　　python由于GIL的原因，导致其线程无法发挥多核的并行计算能力（后来有了multiprocessing，可以实现多进程并行），显得比较鸡肋。既然在GIL之下，同一时刻只能有一个线程在运行，那么对于CPU密集的程序来说，线程之间的切换开销就成了拖累，而以I/O为瓶颈的程序正是协程所擅长的：多任务并发（非并行），每个任务在合适的时候挂起（发起I/O）和恢复(I/O结束)

　　yield除了next方法，还有send方法，yield初始化可以用send(None)

　　yield后可以添加返回值,可以通过g=send('test'),print(list(g))来打印yield返回值

#!/usr/bin/python

# -*- coding:utf-8 -*-

#例子：

def f1(func):

    def f2(*args,**kwargs):

        g = func(*args,**kwargs)

        next(g)

        return g

    return f2

@f1

def eater(name):

    print ('%s ready to eat' %(name))

　　 food_list = []

    while True:

        food = yield food_list

        print ('%s,%s' %(name,food))

g = eater('alex')

g.send('jjj')

三、递归

　　定义：在函数调用过程中，直接或间接地调用了函数本身，这就是函数的递归调用

　　notice:python默认最大递归数为1000，sys.getrecursionlimit()查看

l = list(range(1000))

def to(info,g):

    if len(info) == 0:

        print ('not exit')

        return

    mid = int(len(info)/2)

    if info[mid] > g:

        print ('%s' %(info[0:mid]))

        to(info[0:mid],g)

    elif info[mid] < g:

        print ('%s' %(info[mid+1:]))

        to(info[mid+1:],g)

    elif info[mid] == g:

        print (info[mid])

to(l,25)

四、面向过程

　　定义：面向过程程序设计：是一种流水线式的变成思路，是机械式

　　优点：程序结构清晰，可以把复杂的问题简单化

　　缺点：扩展性差

　　适用场景：

　　　　　　git程序，httpd服务，linux内核

import os

#1、定义一个初始化yield的装饰器

def init(func):

    def wrapper(*args,**kwargs):

        res=func(*args,**kwargs)

        next(res)

        return res

    return wrapper

#第一步先遍历目录

@init

def search(target):

    while True:

        search_path=yield

        g=os.walk(search_path)

        for par_dir,_,files in g:

            for file in files:

                file_abs_path=r'%s\%s' %(par_dir,file)

                # print(file_abs_path)

                target.send(file_abs_path)

#第二步打开文件

@init

def opener(target):

    while True:

        file_abs_path=yield

        # print('opener func==>',file_abs_path)

        with open(file_abs_path,encoding='utf-8') as f:

            target.send((file_abs_path,f))

#第三步读文件里的行

@init

def cat(target):

    while True:

        file_abs_path,f=yield  #(file_abs_path,f)

        for line in f:

            tag=target.send((file_abs_path,line))

            if tag:

                break

#第四步进行grep过滤，如果第一次存在，则跳过该文件，进入下一次文件判断

@init

def grep(target,pattern):

    tag=False

    while True:

        file_abs_path,line=yield tag

        tag=False

        if pattern in line:

            tag=True

            target.send(file_abs_path)

#第五步，打印匹配内容的文件名

@init

def printer():

    while True:

        file_abs_path=yield

        print(file_abs_path)

x=r'路径'

g=search(opener(cat(grep(printer(),'python'))))

print(g)

g.send(x)

五、模块

　　　1、import导入模块

　　　　　　产生新的名称空间，以新建的名称空间为全局名称空间，执行文件的代码，拿到一个模块名，执行模块名.py产生的名称空间

　　　2、from ... import ...

　　　　　　产生新的名称空间，以新建的名称空间为全局名称空间，执行文件的代码，直接拿到就是模块名.py产生的名称空间中名字

　　　　优点：方便不用添加前缀

　　　　缺点：容易跟当前文件的名称空间冲突

　　　3、模块的搜索顺序：

　　　　　　内存---->内置---->sys.path

　　　4、sys.path.append()方法可以添加路径到sys.path里

　　　5、notice，导入模块相当于执行该模块文件代码

　　　6、from .. import * 和 __all__是对应关系，控制import *导入的内容

例子：

　spam.py文件　

#spam.py

print('from the spam.py')

__all__=['money']

money=1000

def read1():

    print('spam->read1->money',money)

def read2():

    print('spam->read2 calling read')

    read1()

def change():

    global money

    money=0

#spam.py当做脚本执行，__name__='__main__'

#spam.py当做模块导入，__name__=模块名

# print('当前文件的用途是: ',__name__)

if __name__ == '__main__':

    print('当做脚本执行')

    change()

    print(money)

执行文件：

#会执行文件，因为spam里有print,所以会直接打印

import spam

#模块的别名,容易和存在文件冲突

import spam as read

print (read.money)

from spam import read1

read1()

#由于原文件提供money的名称共建，所以read1()的名称空间不存在

from spam import *

print (money)

read1()

六、包

　　定义：python程序由包(package)、模块(module)和函数组成。包是由一系列模块组成的集合。模块是处理某一类问题的函数和类的集合、包就是一个完成特定任务的工具箱，包必须含有一个__init__.py文件，它用于标识当前文件夹是一个包。python的程序是由一个个模块组成的。模块把一组相关的函数或代码组织到一个文件中，一个文件即是一个模块。模块由代码、函数和类组成。导入模块使用import语句、包的作用是实现程序的重用。

　　例子：

glance/                   顶级包

├── __init__.py

├── api                  

│   ├── __init__.py

│   ├── policy.py

│   └── versions.py

├── cmd               

│   ├── __init__.py

│   └── manage.py

└── db                  

    ├── __init__.py

    └── models.py

各个包下面的文件内容：

#文件内容

#policy.py

def get():

    print('from policy.py')

#versions.py

def create_resource(conf):

    print('from version.py: ',conf)

#manage.py

def main():

    print('from manage.py')

#models.py

def register_models(engine):

    print('from models.py: ',engine)

　　1、凡是在导入时带点的，点的左边都必须是一个包

　　 2、from a import so.sys是错误语法,import 后不允许加.

　　 3、包含__init__.py文件才为包

　　 4、绝对导入和相对导入

　　　　绝对导入：

　　　　　　以glance作为起始(顶级包)

　　　　相对导入：

　　　　　　用.或者..的方式最为起始（只能在一个包中使用，不能用于不同目录内）

例子：

在glance/api/version.py

#绝对导入

from glance.cmd import manage

manage.main()

#相对导入

from ..cmd import manage

manage.main()

　　5、可以用import导入内置或者第三方模块（已经在sys.path中），但是要绝对避免使用import来导入自定义包的子模块(没有在sys.path中)，应该使用from... import ...的绝对或者相对导入,且包的相对导入只能用from的形式。

　　6、单独导入包

　　　　单独导入包名称时，不会导入包中所有包含所有的子模块

#在与glance同级的test.py中

import glance

glance.cmd.manage.main()

'''

执行结果：

AttributeError: module 'glance' has no attribute 'cmd'

'''

#正确解决方法

#在glance的__init__.py导入

from . import cmd

七、正则模块

　　定义：正则就是用一些具有特殊含义的符号组合到一起（称为正则表达式）来描述字符或者字符串的方法。或者说：正则就是用来描述一类事物的规则。（在Python中）它内嵌在Python中，并通过 re 模块实现。正则表达式模式被编译成一系列的字节码，然后由用 C 编写的匹配引擎执行。

　　1、re.findall

　　　　findall(pattern, string, flags=0)

　　　　匹配所有正则规则放入到一个列表里

　　2、re.search

　　　　search(pattern, string, flags=0)

　　　　只到找到第一个匹配然后返回一个包含匹配信息的对象

　　3、re.match

　　　　Match a regular expression pattern to the beginning of a string.

　　　　在字符串开始处进行匹配,完全可以用search+^代替match

　　4、re.complile

　　　　compile(pattern, flags=0)
　　　　生成一个正则对象

　　5、re.split

　　　　split(pattern, string, maxsplit=0, flags=0)

　　　　以正则进行分割

　　6、re.sub

　　　　sub(pattern, repl, string, count=0, flags=0)

　　　　替换，不指定count替换所有，制定count表示替换几个匹配值

import re

#\w 匹配字母数字下划线

print(re.findall('\w','as213df_*|'))

#输出匹配内容

['a', 's', '2', '1', '3', 'd', 'f', '_']

#\W 匹配非字母数字下划线

print(re.findall('\W','as213df_*|'))

#输出匹配内容

['*', '|']

#\s匹配任意空白字符

print(re.findall('\s','a b\nc\td'))

#输出匹配内容

[' ', '\n', '\t']

#\S匹配任意非空白字符

print(re.findall('\S','a b\nc\td'))

#输出匹配内容

['a', 'b', 'c', 'd']

#\d匹配任意数字[0-9]

print(re.findall('\d','a123bcdef'))

#输出匹配内容

['1', '2', '3']

#\D匹配任意非数字

print(re.findall('\D','a123bcdef'))

#输出匹配内容

['a', 'b', 'c', 'd', 'e', 'f']

#匹配换行符

print(re.findall('\n','a123\nbcdef'))

#输出匹配内容

['\n']

#匹配制表符

print(re.findall('\t','a123\tbc\td\tef'))

#输出匹配内容

['\t', '\t', '\t']

#^匹配字符串的开头

print(re.findall('^d','drango hao123'))

#输出匹配内容

['d']

#$匹配字符串结尾

print(re.findall('3$','e3ll3o e3ggogo hao123'))

#输出匹配内容

['3']

#.匹配任意字符，除了\n换行符，出现\n换行符可用re.S

print(re.findall('a.c','abc a1c a*c a|c abd aed ac'))

print(re.findall('a.c','abc a1c a*c a|c abd aed a\nc',re.S)) #让点能够匹配到换行符

#输出匹配内容

['abc', 'a1c', 'a*c', 'a|c']

['abc', 'a1c', 'a*c', 'a|c', 'a\nc']

#[]表示一组字符，单独列出，[]出现^为取反

print(re.findall('a[1,2\n]c','a2c a,c abc a1c a*c a|c abd aed a\nc'))

print(re.findall('a[0-9]c','a2c a,c abc a1c a*c a|c abd aed a\nc'))

print(re.findall('a[0-9a-zA-Z*-]c','a1c abc a*c a-c aEc'))

#输出匹配内容

['a2c', 'a,c', 'a1c', 'a\nc']

['a2c', 'a1c']

['a1c', 'abc', 'a*c', 'a-c', 'aEc']

#*为0个或多个表达式

print(re.findall('ab*','a'))

print(re.findall('ab*','abbbbbb'))

print(re.findall('ab*','bbbbbb'))

#+为1个或多个表达式

print(re.findall('ab+','a'))

print(re.findall('ab+','abbbbbb'))

print(re.findall('ab+','bbbbbb'))

#{m}为精确匹配前面n个表达式

print(re.findall('ab{3}','ab1 abbbbbbbb2 abbbbb3 ab4 ab122'))

#{m,n}为匹配m,n次前面正则表达式，定义的片段，贪婪方式

print(re.findall('ab{3,4}','ab1 abbb123 abbbb123 abbbbbt'))

print(re.findall('ab{3,}','ab1 abbb123 abbbb123 abbbbbt'))

print(re.findall('ab{0,}','a123123123 ab1 abbb123 abbbb123 abbbbbt'))

#?为匹配0个或者1个的正则表达式，非贪婪模式

print(re.findall('ab?c','ac abc aec a1c'))

#.* 贪婪匹配

print(re.findall('a.*c','ac abc aec a1c'))

#.*？ 非贪婪匹配

print(re.findall('a.*?c','ac abc aec a1c'))

print(re.findall('a.*?c','ac abc a111111111c a\nc a1c',re.S))

#默认分组，findall不匹配全部内容，可用?:结果匹配全部内容

print(re.findall('compan(?:y|ies)',

                 'Too many companies have gone bankrupt, and the next one is my company'))

print(re.findall('ab+123','ababab123'))

print(re.findall('(?:ab)+123','ababab123'))

print(re.findall(r'a\\c','a\c')) #r代表告诉解释器使用rawstring，即原生字符串，把我们正则内的所有符号都当普通字符处理，不要转义

print(re.findall('a\\\\c','a\c')) #r代表告诉解释器使用rawstring，即原生字符串，把我们正则内的所有符号都当普通字符处理，不要转义

print(re.findall(r'a\\c','a\c')) #r代表告诉解释器使用rawstring，即原生字符串，把我们正则内的所有符号都当普通字符处理，不要转义

print(re.findall('a\\\\c','a\c')) #同上面的意思一样，和上面的结果一样都是['a\\c']

　　?:结果匹配全部内容

　　>>> print(re.findall('(?:ab)+123','ababab123'))
　　　　['ababab123']
　　>>> print(re.findall('(ab)+123','ababab123'))
　　　　['ab']

　　7、关于正则的总结

　　　　适用()得到的匹配目标，用group(n)去取得结果

　　　　尽量使用非贪婪模式：.*?

　　　　尽量使用泛匹配模式.*

　　　　遇到换行符就用re.S，修改模式