Python文本和字符串常用操作

## 字符串分割

 line = "This is my love!"

 fields = line.split(' ')

 print(fields)

 # ['This', 'is', 'my', 'love!']

 # 多条件分割

 import re

 line = "asd dfwerf, sdfs; jtyy. werwe, sdfsd"

 fields = re.split(r"(,|\.|;|\s)\s*", line)

 print(fields)

 # ['asd', ' ', 'dfwerf', ',', 'sdfs', ';', 'jtyy', '.', 'werwe', ',', 'sdfsd']

 # 如果不需要分隔符可以使用(?:...)不捕获分组

 fields = re.split(r"(?:,|\.|;|\s)\s*", line)

 print(fields)

 # ['asd', 'dfwerf', 'sdfs', 'jtyy', 'werwe', 'sdfsd']

## 开头或结尾匹配startswith(), endswith()

 url = 'https://www.baidu.com'

 print(url.startswith('https'))

 # True

 words = ['sd', 'asad', 'fgwer', 'jtdwse', 'qwieu', 'pqwej', 'ejwqi', 'iaweq']

 w = [word for word in words if word.startswith(('a', 'e', 'i'))]

 # startswith()，endswith()方法接收一个元组表示多项匹配，元组的括号不可以省略

 print(w)

 # ['asad', 'ejwqi', 'iaweq']

 # 判断某个目录下是否有某些类型的文件

 if any(name.endswith(('.c', '.h')) for name in listdir(dirname)):

     pass

## Shell通配符(*?[0-9])匹配

 from fnmatch import fnmatch, fnmatchcase

 print(fnmatch('hello.py', '*.py'))

 # True

 print(fnmatch('hello.py', '?ello.py'))

 # True

 print(fnmatch('hello-1.py', 'hello-[0-9].py'))

 # True

 print(fnmatch('hello.txt', '*.TXT'))

 # fnmatch()在不同操作系统中结果不同

 # 若在类UNIX系统中为False(即大小写敏感)，在Windows系统中为True(即大小写不敏感)

 print(fnmatchcase('hello.txt', '*.TXT'))

 # False

 # fnmatchcase()在任何操作系统中都大小写敏感

## 字符串匹配和搜索

 import re

 date = '7/24/2018'

 # 匹配

 print(bool(re.match(r'\d+/\d+/\d+', date)))

 # True

 # 同一模式多次使用可以预先编译

 date_pat = re.compile(r'\d+/\d+/\d+')

 print(bool(date_pat.match(date)))

 # True

 # 查找并捕获

 date_pat = re.compile(r'(\d+)/(\d+)/(\d+)')

 line = "Today is 7/24/2018, tomorrow is 7/25/2018"

 print(date_pat.findall(line))

 # [('7', '24', '2018'), ('7', '25', '2018')]

 m = date_pat.match(date)

 print(m)

 print(m.group())

 # 7/24/2018

 print(m.group(1))

 #

 print(m.group(2))

 #

 print(m.group(3))

 #

 print(m.group(0))

 # 7/24/2018

## 字符串的修改和替换

 line = "Yes, it's me!"

 print(line.replace("Yes", "Yeah"))

 # Yeah, it's me!

 import re

 text = "Today is 7/24/2018, tomorrow is 7/25/2018"

 date_pat = re.compile(r'(\d+)/(\d+)/(\d+)')

 print(date_pat.sub(r'\3-\1-\2', text))   # 其中r'\3-\1-\2'指的是通过正则捕获的数据的索引，和上面group()方法的对应关系相同

 # Today is 2018-7-24, tomorrow is 2018-7-25

 # sub()方法还可以接受一个函数作为参数以应对更加复杂的替换

 from calendar import month_abbr

 def change_date(m):  # 参数为match对象，即match()或者find()方法的返回值

     mon_name = month_abbr[int(m.group(1))]

     return "{} {} {}".format(m.group(2), mon_name, m.group(3))

 print(date_pat.sub(change_date, text))

 # Today is 24 Jul 2018, tomorrow is 25 Jul 2018

 # 查看subn()替换的数量

 print(date_pat.subn(change_date, text)) # 返回一个元组，第二个值为替换的次数

 # ('Today is 24 Jul 2018, tomorrow is 25 Jul 2018', 2)

## 搜索忽略大小写

 import re

 text = "UPPER PYTHON, lower python, Mixed Python"

 print(re.findall('python', text, flags=re.IGNORECASE))

 # ['PYTHON', 'python', 'Python']

 print(re.sub('python', 'java', text, flags=re.IGNORECASE))  # 替换并不会按照原有规则，而是所有匹配项都同一替换

 # UPPER java, lower java, Mixed java

 # 可以使用辅助函数弥补

 def matchcase(word):

     def replace(m): # 参数为一个match对象

         text = m.group()

         if text.isupper():

             return word.upper()

         if text.islower():

             return word.lower()

         if text[0].isupper(): # 首字母大写

             return word.title()

         return word

     return replace

 print(re.sub('python', matchcase('java'), text, flags=re.IGNORECASE))

 # UPPER JAVA, lower java, Mixed Java

## 贪婪匹配和最短匹配

 import re

 text = 'You said "Yes", I said "No"'

 print(re.findall(r'\".*\"', text))  # 贪婪匹配，匹配结果尽可能长

 # ['"Yes", I said "No"']

 print(re.findall(r'\".*?\"', text)) # 最短匹配， 匹配结果尽可能短

 # ['"Yes"', '"No"']

## 多行匹配

 import re

 comment = re.compile(r'/\*(.*?)\*/')    # 此模式无法匹配多行, 因为 . 号无法匹配换行符

 text1 = "/* this is a comment */"

 text2 = """

     /* this is a

     multiline comment */

 """

 print(comment.findall(text1))

 # [' this is a comment ']

 print(comment.findall(text2))

 # []

 comment = re.compile(r'/\*((?:.|\n)*?)\*/') # 在模式中加上换行符，可以匹配多行

 print(comment.findall(text1))

 print(comment.findall(text2))

 # [' this is a\n    multiline comment ']

 comment = re.compile(r'/\*(.*?)\*/', flags=re.DOTALL) # flags=re.DOTALL使 . 号可以匹配所有字符

 print(comment.findall(text1))

 # [' this is a comment ']

 print(comment.findall(text2))

 # [' this is a\n    multiline comment ']

## 删除多余字符

 text = "---Hello   World+++"

 print(text.strip("-+")) # 参数默认为 ‘ ’ 空格， 只能清除两侧的多余内容，字符串中间的内容无法清除

 # Hello   World

 print(text.lstrip("-")) # 清除左侧多余字符

 # Hello   World+++

 print(text.rstrip("+")) # 清除右侧多余字符

 # ---Hello   World

 text = "Hello  World"

 print(text.replace('  ', ' ')) # 只能清除/替换固定个数的字符

 # Hello World

 import re

 text = "Hello     World"

 print(re.sub('\s+', ' ', text))  # 可以清除/替换不定个数的字符

 # Hello World

## 字符串对齐

 text = "Hello World"

 print(text.ljust(20, '-')) #v 第一个参数为总字符数， 第二个参数为填充字符，默认为空格

 # Hello World---------

 print(text.rjust(20))

 #          Hello World

 print(text.center(20, '-'))

 # ----Hello World-----

 print(format(text, '>20'))

 #          Hello World

 print(format(text, '-<20'))

 # Hello World---------

 print(format(text, '+^20'))

 # ++++Hello World+++++

 print(format(3.14159265, '-^10.4f'))  # 格式化数字

 # --3.1416--

## 字符串中插入变量

 text = 'Hello, {name}!'

 print(text.format(name="Stanley"))

 # Hello, Stanley!

 text = '{name} has {n} message(s).'

 name = "Stanley"

 n = 32

 print(text.format_map(vars())) # 从全局变量中查找相应数据

 # Stanley has 32 message(s).

 class UserInfo:

     def __init__(self, name, n):

         self.name = name

         self.n = n

 a = UserInfo('Stanley', 30)

 print(text.format_map(vars(a))) # 从实例属性中查找数据

 # Stanley has 30 message(s).

　　　　- 定义一个类包装输入，避免变量找不到的问题

 class safesub(dict):

     def __missing__(self, key): # 重写__missing__()方法

         return "{" + key + "}"

 text = '{name} has {n} message(s).'

 name = "Stanley"

 print(text.format_map(safesub(vars())))

 # Stanley has {n} message(s).

## 文本换行

import textwrap

text = "Python is an interpreted high-level programming language for general-purpose programming. Created by Guido van Rossum and first released in 1991, Python has a design philosophy that emphasizes code readability, notably using significant whitespace. It provides constructs that enable clear programming on both small and large scales. In July 2018, the creator Guido Rossum stepped down as the leader in the language community after 30 years."

print(textwrap.fill(text, 40))

"""

Python is an interpreted high-level

programming language for general-purpose

programming. Created by Guido van Rossum

and first released in 1991, Python has a

design philosophy that emphasizes code

readability, notably using significant

whitespace. It provides constructs that

enable clear programming on both small

and large scales. In July 2018, the

creator Guido Rossum stepped down as the

leader in the language community after

30 years.

"""

print(textwrap.fill(text, 80, initial_indent=" ")) # 首行缩进

"""

Python is an interpreted high-level programming language for general-

purpose programming. Created by Guido van Rossum and first released in 1991,

Python has a design philosophy that emphasizes code readability, notably using

significant whitespace. It provides constructs that enable clear programming on

both small and large scales. In July 2018, the creator Guido Rossum stepped down

as the leader in the language community after 30 years.

"""

print(textwrap.fill(text, 80, subsequent_indent=" ")) # 悬挂缩进

"""

Python is an interpreted high-level programming language for general-purpose

programming. Created by Guido van Rossum and first released in 1991,

Python has a design philosophy that emphasizes code readability, notably

using significant whitespace. It provides constructs that enable clear

programming on both small and large scales. In July 2018, the creator

Guido Rossum stepped down as the leader in the language community after 30

years.

"""

参考资料：
　　Python Cookbook, 3rd edition, by David Beazley and Brian K. Jones (O’Reilly).

Python文本和字符串常用操作的更多相关文章

Python 基礎 - 字符串常用操作
字符串常用操作今天就介紹一下常用的字符串操作,都是以 Python3撰寫的首字母變大寫 #!/usr/bin/env python3 # -*- coding:utf-8 -*- name = & ...
python基础之字符串常用操作总结
字符串的索引 s = 'ABCDLSESRF' # 索引这两个很简单没什么说的 s1 = s[0] print(s1) # A s2 = s[2] print(s2) # C 切片 s = 'ABC ...
初识python：字符串常用操作
直接上代码示例: #!/user/bin env python # author:Simple-Sir # time:20180914 # 字符串常用操作 name = 'lzh lyh' print ...
Python学习笔记五：字符串常用操作，字典，三级菜单实例
字符串常用操作 7月19日,7月20日 ,7月22日,7月29日,8月29日,2月29日首字母大写:a_str.capitalize() 统计字符串个数:a_str.count(“x”) 输出字符, ...
javascript中字符串常用操作整理
javascript中字符串常用操作整理字符串的操作在js中非常频繁,也非常重要.以往看完书之后都能记得非常清楚,但稍微隔一段时间不用,便会忘得差不多,记性不好是硬伤啊...今天就对字符串的一些常用 ...
文本处理sed常用操作
文本处理sed常用操作 linux sed (stream editor) is a Unix utility that parses and transforms text, using a sim ...
python基础（字符串常用、数字类型转换、基本运算符与流程控制）
一.字符串常用操作: #! /usr/bin/env python # -*- coding: utf-8 -*- # __author__ = "Z'N'Y" # Date: 2 ...
Python3笔记022 - 5.1 字符串常用操作
第5章字符串及正则表达式 5.1 字符串常用操作 5.1.1 拼接字符串使用+运算符可完成多个字符串的拼接,产生一个新的字符串对象. str1 = "2020年07月06日是" ...
python文本去掉字符串前后空格
python文本去掉字符串前后空格场景: 去掉字符串前后空格可以使用strip,lstrip,rstrip方法 >>> a="abc".center (30 ...

随机推荐

电脑护眼小软件f.lux
f.lux这软件用了能不能保护好视力不好说,反正我是用了以后这么多年一直都在用,狠不下心删去.至少安装后能让心里多一些安全感! 以前老控制不住长期坐在电脑前不动,太需要有这类软件来养护.用了没太明显的 ...
Nginx 性能参数优化
user www www; # ginx要开启的进程数一般等于cpu的总核数,没必要开那么多,1个nginx内存消耗10兆左右 worker_processes 4; # 为每个进程分配cpu,上例 ...
linux crontab 的使用
linux crontab 的使用准备(实验楼需要,实际环境不需要):sudo service rsyslog startsudo cron -f & crontab 使用添加任务:cron ...
C#中的多线程 - 高级多线程 z
原文:http://www.albahari.com/threading/part4.aspx 专题:C#中的多线程 1非阻塞同步Permalink 之前,我们描述了即使是很简单的赋值或更新一个字段也 ...
mysqldump导出sql文件中insert多行问题
mysqldump为了加快导入导出,默认把数据都缩减在一行里面. 查看和修改不方便,为此,我们可以使用--skip-extended-insert选项来使导出的数据,是多行插入形式的. mysqldu ...
Python初学者第五天列表及简单操作
5day 数据类型:列表 1.创建列表 user = ['aa','14',1,10,'aa',1,2,3,3,5,9] n = [] list() m = list() 2.查询 a.按索引查询 b ...
Http status（二）
http含义: http 200:-文件被正常的访问 http 302:临时重定向 HTTP错误列表 HTTP 400 - 请求无效 HTTP 401.1 - 未授权:登录失败 HTTP 401.2 ...
QA-IDEA中用maven配置项目无法加载JDBC
java.lang.ClassNotFoundException: com.mysql.jdbc.Driver Im building Maven Java Web application and w ...
ZT C,C++表达式求值顺序裘老的解释。 [问题点数：300分]
http://bbs.csdn.net/topics/370153775 [置顶] [推荐] C,C++表达式求值顺序裘老的解释. [问题点数:300分] 最近这问题有从日经变时经的趋势,这里贴出裘 ...
关于TCHAR和string对象的c.str()一些注意事项
1.TCHAR 根据预处理器的设置,如果是_MBCS, 那么TCHAR = char: 如果如果设置的是UNICODE和_UNICODE,那么TCHAR=wchar_t.就等于根据当前环境会选择不同 ...

Python文本和字符串常用操作

Python文本和字符串常用操作的更多相关文章

随机推荐

热门专题