python3

2024-07-24 约 6783 字预计阅读 14 分钟次阅读

基础

注释

1
2
3
4
5
6
7
8
9


# 注释1
 
'''
注释2
'''

"""
注释3
"""

多行语句

1
2
3
4
5
6
7


total = item_one + \
        item_two + \
        item_three

#  [], {}, 或 () 中的多行语句不用反斜杠
total = ['item_one', 'item_two', 'item_three',
        'item_four', 'item_five']

一行执行多条语句

1

a = 10; print(a)

数值类型：int (整数)、bool (布尔)、float (浮点数)、complex (复数)

1

round(5.2) # 四舍五入

字符串：单引号、双引号、三引号（连接符号+）

转义符：\（r 原始字符串不发生转义）

多次执行：“123” * 3

1
2
3
4
5
6
7
8


text = '123456789'

print(text)        # 输出字符串 123456789
print(text[0:-1])  # 输出第一个到倒数第二个 12345678
print(text[0:3])   # 输出第一个到第四个（不包括四） 123
print(text[0])     # 输出第一个 1
print((text[2:]))  # 输出第三个到最后 3456789
print(text[1:4:2]) # 输出第二个到第五个，步长二 24

标准数据类型

**不可变数据（3 个）：**Number（数字）、String（字符串）、Tuple（元组）；
**可变数据（3 个）：**List（列表）、Dictionary（字典）、Set（集合）。
其他：bool（布尔类型）、bytes（字节数组）

判断数据类型

1
2


print(type(1.2))  # <class 'float'>
print(isinstance(123, int)) # True

等待用户输入：

1

input("\ninput:")

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14


# 同一行输出
print(11, end="")
print(22, end="")

# 不同行输出
print(11)
print(22)

print('{0} 和 {1}'.format('Google', 'Runoob'))
print('{}网址： "{}!"'.format('菜鸟教程', 'www.runoob.com'))
print('{name}网址： {site}'.format(name='菜鸟教程', site='www.runoob.com'))

print(f"xxx {a} xxx")
print("xxx%sxxx%d" % ('小明', 10))

导入

1
2
3
4
5
6
7


# 导入模块
import os
cmd = os.system("id")

# 导入模块中的成员
from os import system
cmd = system("id")

多个变量赋值

1

a, b, c = 1, 2, "runoob"

数值运算

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14


>>> 5 + 4  # 加法
9
>>> 4.3 - 2 # 减法
2.3
>>> 3 * 7  # 乘法
21
>>> 2 / 4  # 除法，得到一个浮点数
0.5
>>> 2 // 4 # 除法，得到一个整数
0
>>> 17 % 3 # 取余
2
>>> 2 ** 5 # 乘方
32

List列表（可修改）

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27


list = [ 'abcd', 786 , 2.23, 'runoob', 70.2 ]  # 定义一个列表
list2 = [123, 'runoob']

list.append('Baidu') # 增加元素
list.insert(1, "xx") # 将对象插入列表
list.extend(list2)  # 扩展列表
list + list2 # 打印两个列表拼接在一起的结果

list.index(786) # 从列表中找出某个值第一个匹配项的索引位置
list.count(786)  # 元素出现的次数

list.remove("abcd") # 移除列表中某个值的第一个匹配项
list.pop(0) # 移除列表中的一个元素并返回元素值 (默认最后一个元素)

list.reverse() # 反向列表中元素
list.sort() # 进行排序
list.clear() # 清空列表
list.copy() # 复制列表
list(Tuplex) # 将元祖转换为list列表
list("hello") # 将字符串转换为list列表 ['h', 'e', 'l', 'l', 'o']

print (list)            # 打印整个列表
print (list[0])         # 打印列表的第一个元素
print (list[1:3])       # 打印列表第二到第四个元素（不包含第四个元素）
print (list[2:])        # 打印列表从第三个元素开始到末尾
print (list2 * 2)    # 打印list2列表两次
max(list) #  返回列表元素最大值/min最小值

Tuple元祖（不可修改）

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


tuple = ( 'abcd', 786 , 2.23, 'runoob', 70.2  )
tinytuple = (123, 'runoob')

print (tuple)             # 输出完整元组
print (tuple[0])          # 输出元组的第一个元素
print (tuple[1:3])        # 输出从第二个元素开始到第三个元素
print (tuple[2:])         # 输出从第三个元素开始的所有元素
print (tinytuple * 2)     # 输出两次元组
print (tuple + tinytuple) # 合并元组
del tuple # 删除整个元祖
len(tuple) # 计算元祖元素个数
tuple(listx) # 将list列表转换为元祖
max(tuple) # 返回元组中元素最大值。/min最小值

Set集合（可修改）

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37


sites = {'Google', 'Taobao', 'Runoob', 'Facebook', 'Zhihu', 'Baidu'}
print(sites)   # 输出集合，重复的元素被自动去掉

# 成员测试
if 'Runoob' in sites :
    print('Runoob 在集合中')
else :
    print('Runoob 不在集合中')

# set可以进行集合运算
a = set('abracadabra')
b = set('alacazam')

print(a)
print(a - b)     # a 和 b 的差集
print(a | b)     # a 和 b 的并集
print(a & b)     # a 和 b 的交集
print(a ^ b)     # a 和 b 中不同时存在的元素
set('eleven') & set('twelve') # 将字符串转换为集合（计算并集）

sites.add("orange")  # 添加元素
sites.clear() # 移除集合中的所有元素
sites.copy() 
x.difference(y) # 集合的差集
x.difference_update(y) # 移除两个集合都包含的元素
sites.intersection() # 集合的交集
x.intersection_update(y)  # 移除 x 集合中不存在于 y 集合中的元素
x.isdisjoint(y) # 是否有相同元素
x.issubset(y) # 判断指定集合是否为该方法参数集合的子集。
x.issuperset(y) # 判断该方法的参数集合是否为指定集合的子集 
sites.remove("Taobao") # 移除指定元素
sites.discard("Taobao") # 移除指定元素
x.symmetric_difference(y) # 移除两个集合的重复元素和set1 ^ set2效果一致
x.symmetric_difference_update(y) # 在原始集合 x 中移除与 y 集合中的重复元素，并将不重复的元素插入到集合 x 中
x.union(y) # 合并两个集合，重复元素只会出现一次
x.update(y) # 合并两个集合，重复元素只会出现一次
len(text) # 多少元素

Dictionary字典（可修改）

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31


dict = {}
dict['one'] = "1 - 菜鸟教程"
dict[2]     = "2 - 菜鸟工具"

tinydict = {'name': 'runoob','code':1, 'site': 'www.runoob.com'}


print (dict['one'])       # 输出键为 'one' 的值
print (dict[2])           # 输出键为 2 的值
print (tinydict)          # 输出完整的字典
print (tinydict.keys())   # 输出所有键
print (tinydict.values()) # 输出所有值

dict(a='a', b='b', t='t')  # 创建一个字典 {'a': 'a', 'b': 'b', 't': 't'}
dict(zip(['one', 'two', 'three'], [1, 2, 3]))   # 映射函数方式来构造字典
dict([('one', 1), ('two', 2), ('three', 3)])    # 可迭代对象方式来构造字典
dict(zip(list1,list2)) # 两个list列表转换为字典

len(dict) # 计算字典元素个数，即键的总数。
str(dict) #输出字典，可以打印的字符串表示。
dict.clear() # 清空字典
dict.copy() # 复制字典
dict.fromkeys(seq) # 返回一个新字典 {'age': None, 'name': None, 'sex': None}
dict.fromkeys(seq, 10) # 返回一个新字典  {'age': 10, 'name': 10, 'sex': 10}
dict.get(key, default=None) # 返回指定键的值，如果键不在字典中返回 default 设置的默认值
tinydict.items() # 返回key value列表 dict_items([('name', 'runoob'), ('code', 1), ('site', 'www.runoob.com')])
tinydict.keys() # 返回key列表 dict_keys(['name', 'code', 'site'])
tinydict.values() # 返回value列表
tinydict.update(tinydict2) # 追加字典
tinydict.pop('name') # 删除对应的元素
tinydict.popitem() # 删除最后一个元素

Bytes类型

1
2
3


x = b"hello"
if x[0] == ord("h"):
    print("The first element is 'h'")

转换

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16


int('11',2)  # 进制转换
int(-11.123) # 取整
float(-123.6) # 转换成浮点型
str("123") # 转换成字符串
repr("123") # 转换成字符串(包括转义字符也被输出)
eval("__import__('platform').uname()") # 执行表达式
tuple(listx) # 将list列表转换为元祖
list(Tuplex) # 将序列转换为list列表 [123, 'Google', 'Runoob', 'Taobao']
list("hello") # 将字符串转换为list列表 ['h', 'e', 'l', 'l', 'o']
set('eleven') & set('twelve') # 将字符串转换为集合（计算并集）
dict(zip(list1,list2)) # 两个list列表转换为字典
frozenset(list1) # 转换为不可变集合
chr(0x30), chr(97), chr(0x61)   # 将整数转换为字符（十六进制/十进制）
ord("a") # 将字符转换为整数
hex(12) # 将一个整数转换为一个十六进制字符串
oct(10) # 整数转换成八进制字符串

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


if (n := 10) > 5: #Python 3.8 及更高版本中引用海象运算符，在表达式中同时进行赋值和返回赋值的值
if ( a and b ): # 与
if ( a or b ): # 或
if not( a and b ): # 非
if ( a == b ):
if ( a != b ):
if ( a <= b ):
if "9" in "12345":
if "9" not in "12345":
if key in dict: # 如果键在字典dict里返回true，否则返回false
if elif 

Match…case

1
2
3
4
5
6
7
8
9


match subject:
    case <pattern_1>:
        <action_1>
    case <pattern_2>:
        <action_2>
    case <pattern_3>:
        <action_3>
    case _: # 类似于 Java 中的 default:
        <action_wildcard>

for

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17


d = {'Name': 'Runoob', 'Age': 7}
for i,j in d.items():
    print(i, ":\t", j)
    
'''输出
Name :	 Runoob
Age :	 7
'''

for item in iterable:
    # 循环主体
else:
    # 循环结束后执行的代码
    
for i in range(5,9) : #5-8
for i in range(5): #0-4
for i in range(0, 10, 3) :# 0-10步长3

while

1
2
3
4


while <expr>:
    <statement(s)>
else:
    <additional_statement(s)>

pass 不做任何事情，一般用做占位语句

字符串内建函数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


"xxx".center(20, '*') # 字符串居中
"abccd".count("c",0,3) # 0到3（不包括3）出现c的次数
"xxx".decode('UTF-8', 'strict') # 解码
'01234'.endswith('234', 2, 5) # 判断字符串是否以指定后缀结尾
'01234'.find('2') # 查找字符串所在索引，不包含返回-1。和index一样
"abc123".isalnum() # 判断所有字符都是数字或者字母
"abc123".isalpha() # 判断所有字符都是字母
"abc123".isdigit() # 判断所有字符都是数字
"abc123".islower() # 判断所有字符都是小写
"ABC123".isupper() # 判断所有字符都是大写
"ABC123".istitle() # 判断所有单词都是首字母大写，像标题
" ".isspace() # 判断所有字符都是空白字符、\t、\n、\r
"123".isnumeric() # 只要有非数字就是false
" ".isspace() # 只有空格就是true
"adfAA".upper() # 把所有字符中的小写字母转换成大写字母
"adfAA".lower() # 把所有字符中的大写字母转换成小写字母
"xxx".capitalize() # 把第一个字母转化为大写字母，其余小写
"-".join(listx) # 分隔list列表，返回字符串
"-".join(tuplex) #分隔tuple元祖，返回字符串
"www.baidu.com".split(".") # 分隔字符，返回list列表
"ab c\n\nde fg\rkl\r\n".splitlines() # 换行符分隔('\r', '\r\n', \n') 返回list
"123".len() # 返回字符串长度


"adfAA".swapcase() # 大写转小写，小写转大写
'www.example.com'.lstrip('wx.') # 从左到右移除字符串的指定字符
'www.example.com'.rstrip("rip") # 删除字符串末尾制定字符
'www.example.com'.strip("w") # 删除前后指定字符 
"www.old.com".replace("old", "new") # 字符串替换
"this is string".startswith('this') # 字符串是否以 this 开头
"aa22".isdecimal() # 检查字符串是否只包含十进制字符
'12'.zfill(5) # 数字的左边填充0 00012

中级

列表推导式格式为：

1

number1 = [number for number in range(30) if number%3==0] # 计算30以内能被3整除的数

字典推倒式

1
2


listdemo = ['Google','Runoob', 'Taobao'] # 使用字符串及其长度创建字典：
{ld:len(ld) for ld in listdemo} 

集合推导式

1

{x for x in 'abracadabra' if x not in 'abc'} # 判断不是 abc 的字母并输出：

迭代器

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16


list=[1,2,3,4]
it = iter(list)    # 创建迭代器对象
for x in it:       #可以正常for循环
    print (x, end=" ")

    

import sys  # 引入 sys 模块
list = [1, 2, 3, 4]
it = iter(list)  # 创建迭代器对象

while True:
    try:
        print(next(it)) #  也可以用next
    except StopIteration: # 用于标识迭代的完成，防止出现无限循环的情况
        sys.exit()

生成器

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17


def countdown(n):
    while n > 0:
        yield n
        n -= 1


# 创建生成器对象
generator = countdown(5)

# 通过迭代生成器获取值
print(next(generator))  # 输出: 5
print(next(generator))  # 输出: 4
print(next(generator))  # 输出: 3

# 使用 for 循环迭代生成器
for value in generator:
    print(value)  # 输出: 2 1

函数

1
2
3
4
5
6
7
8
9


def max(a, b):
    if a > b:
        return a
    else:
        return b
 
a = 4
b = 5
print(max(a, b))

lambda（匿名函数）

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25


map() 函数可以将一个函数应用于列表中的每个元素，并返回一个新列表，其中包含每个元素被该函数处理后的结果。

示例：

my_list = [1, 2, 3, 4]
new_list = map(lambda x: x**2, my_list)
print(list(new_list))
# output: [1, 4, 9, 16]
filter() 函数可以将一个函数应用于列表中的每个元素，并返回一个新列表，其中包含使该函数返回 True 的所有元素。

示例：

my_list = [1, 2, 3, 4, 5]
new_list = filter(lambda x: x > 3, my_list)
print(list(new_list))
# output: [4, 5]
reduce() 函数将一个函数应用于列表中的每个元素，同时将前一个函数调用的结果作为参数传递给下一个函数调用，最终返回最终结果。

示例：

from functools import reduce
my_list = [1, 2, 3, 4]
result = reduce(lambda x, y: x*y, my_list)
print(result)
# output: 24

装饰器

带参数的装饰器

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14


def repeat(n):
    def decorator(func):
        def wrapper(*args, **kwargs):
            for _ in range(n):
                result = func(*args, **kwargs)
            return result
        return wrapper
    return decorator

@repeat(3)
def greet(name):
    print(f"Hello, {name}!")

greet("Alice")

类装饰器

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


class DecoratorClass:
    def __init__(self, func):
        self.func = func
    
    def __call__(self, *args, **kwargs):
        # 在调用原始函数之前/之后执行的代码
        result = self.func(*args, **kwargs)
        # 在调用原始函数之后执行的代码
        return result

@DecoratorClass
def my_function():
    pass

竖着看

模式	r	r+	w	w+	a	a+
读	+	+		+		+
写		+	+	+	+	+
创建			+	+	+	+
覆盖			+	+
指针在开始	+	+	+	+
指针在结尾					+	+

读

1
2
3
4


f = open("test.txt","w")
f.write("test")
f.close()
# writelines(sequence)向文件写入一个序列字符串列表，如果需要换行则要自己加入每行的换行符。

写

1
2
3
4
5
6


f = open("test.txt","r") 
str = f.read()
print(str)
f.close()
# readline([size])读取整行，包括 "\n" 字符
# readlines([sizeint])读取所有行并返回列表，若给定sizeint>0，返回总和大约为sizeint字节的行, 实际读取值可能比 sizeint 较大, 因为需要填充缓冲区。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


os.listdir("./") # 列出目录下的文件
os.access("test.txt", os.F_OK) # path是否 存在F/读R/写W/执行X
os.chdir( "/" ) # 修改当前工作目录
os.chmod(path, mode) # 更改权限 
os.chown(path, uid, gid) # 更改文件所有者
os.chroot(path) # 变更当前进程的根目录
os.rename("old_file.txt", "new_file.txt") # 重命名文件/目录
os.replace('google.txt','runoob.txt') # 重命名文件/目录
os.mkdir("folder") # 新建目录
os.rmdir("folder") # 删除目录
os.remove("file.txt") # 删除文件
os.getcwd() # 返回当前工作目录
shutil.move(original_file, target_folder) # 移动文件
shutil.copy(original_file, target_folder) # 复制文件
glob.glob('*.py') # 文件通配符
os.system("ls -l")
os.popen("ls -l").read()
os.spawnv(os.P_NOWAIT, "/bin/ls", ["ls", "-l"])
subprocess.call(["ls", "-l"])
subprocess.run(["ls", "-l"], stdout=subprocess.PIPE).stdout.decode()
subprocess.Popen(["ls", "-l"], stdout=subprocess.PIPE).stdout.read().decode()
exec("os.system('ls -l')")
eval("os.system('ls -l')")
runpy.run_path("getAtUsername.py")

高级

类

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


class Clas:
    age = 44 # 定义基本属性
    __name = "ty" # 定义私有属性,私有属性在类外部无法直接进行访问
    def __init__(self,x,y):
        self.x = x
        self.y = y
    def display_value(self):
        return 33

c = Clas(11,22)
print(c.x)
print(c.display_value())
print(c.age)

继承

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


class people:
    name = ""
    age = 0
    __weight = 0
    def __init__(self,n,a,w):
        self.name = n
        self.age = a
        self.__weight = w

    def speak(self):
        return "%s 说: 我 %d 岁。"%(self.name,self.age)

class student(people):
    grade = 0
    def __init__(self,n,a,w,g):
        people.__init__(self,n,a,w) # 调用父类的构函
        self.grade = g

    def speak(self): # 方法重写
        return "%s 说: 我 %d 岁了，我在读 %d 年级"%(self.name,self.age,self.grade)

s = student('ken',10,60,3)
print(s.speak())

私有方法

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


#!/usr/bin/python3

class Site:
    def __init__(self, name, url):
        self.name = name  # public
        self.__url = url  # private

    def who(self):
        print('name  : ', self.name)
        print('url : ', self.__url)

    def __foo(self):  # 私有方法
        print('这是私有方法')

    def foo(self):  # 公共方法
        print('这是公共方法')
        self.__foo()


x = Site('菜鸟教程', 'www.runoob.com')
x.who()  # 正常输出
x.foo()  # 正常输出
x.__foo()  # 报错

标准库

os 模块：os 模块提供了许多与操作系统交互的函数，例如创建、移动和删除文件和目录，以及访问环境变量等。
sys 模块：sys 模块提供了与 Python 解释器和系统相关的功能，例如解释器的版本和路径，以及与 stdin、stdout 和 stderr 相关的信息。
time 模块：time 模块提供了处理时间的函数，例如获取当前时间、格式化日期和时间、计时等。
datetime 模块：datetime 模块提供了更高级的日期和时间处理函数，例如处理时区、计算时间差、计算日期差等。
random 模块：random 模块提供了生成随机数的函数，例如生成随机整数、浮点数、序列等。
math 模块：math 模块提供了数学函数，例如三角函数、对数函数、指数函数、常数等。
re 模块：re 模块提供了正则表达式处理函数，可以用于文本搜索、替换、分割等。
json 模块：json 模块提供了 JSON 编码和解码函数，可以将 Python 对象转换为 JSON 格式，并从 JSON 格式中解析出 Python 对象。
urllib 模块：urllib 模块提供了访问网页和处理 URL 的功能，包括下载文件、发送 POST 请求、处理 cookies 等。

1
2
3
4
5
6


sys.argv[0] # 拿命令行中的参数 python demo.py one two three
re.findall(r'\bf[a-z]*', 'which foot or hand fell fastest') # 正则表达式
request 访问互联网
datetime.datetime.now() # 时间和日期 
datetime.date.today() # 获取当前日期
current_datetime.strftime("%Y-%m-%d %H:%M:%S") # 格式化日期

+ 代表前面的字符必须出现（1次、多次）

* 代表前面的字符必须出现（0次、1次、多次）

? 代表前面的字符最多出现（0次、1次）

. 除换行符以外的所有字符

^ 字符串开头

$ 字符串结尾

\d 匹配数字。等价于 [0-9]

\D 非数字。等价于 [^0-9]

\s 匹配任意空白字符（包括空格、制表符、换行等）。等价于 [ \f\n\r\t\v]

\S 非空白字符。等价于 [^ \f\n\r\t\v]

\w 匹配数字、字母、下划线。等价于[A-Za-z0-9_]

\W 非数字、字母、下划线。等价于 [^A-Za-z0-9_]

[] 匹配方括号中所包含的任意一个字符

[abc] 匹配中括号内的任意一个字母

[a-z] 匹配 a-z 中的一个字母

[0-9] 匹配任何数字。类似于 [0123456789]

[a-zA-Z0-9] 匹配任何字母及数字

[^abc] 匹配除了a、b、c 字母以外的所有字符

aa|bb 匹配aa或bb

{n} 前面的字符出现（n次）

{n,} 前面的字符出现（n次、n以上）

{m,n} 前面的字符出现（最少m次、最多n次）

(?=expr) 正向向前查找 expr

(?!expr) 负向向前模式 expr

(?<=expr) 正向向后查找 expr

(?<!expr) 负向向后模式 expr

1
2


re.match(pattern, string, flags=0) # 只匹配字符串的开始，如果字符串开始不符合正则表达式，则匹配失败，函数返回None
re.search(pattern, string, flags=0) # 匹配整个字符串，直到找到一个匹配。

函数参数说明：

参数	描述
pattern	匹配的正则表达式
string	要匹配的字符串。
flags	标志位，用于控制正则表达式的匹配方式，如：是否区分大小写，多行匹配等等。

flags

修饰符	描述
re.I	使匹配对大小写不敏感
re.L	做本地化识别（locale-aware）匹配
re.M	多行匹配，影响 ^ 和 $，使它们匹配字符串的每一行的开头和结尾。
re.S	使 . 匹配包括换行在内的所有字符
re.U	根据Unicode字符集解析字符。这个标志影响 \w, \W, \b, \B.
re.X	忽略空格和注释，可以更清晰地组织复杂的正则表达式。
re.ASCII	使 \w, \W, \b, \B, \d, \D, \s, \S 仅匹配 ASCII 字符。

匹配成功re.search方法返回一个匹配的对象，否则返回None。

我们可以使用group(num) 或 groups() 匹配对象函数来获取匹配表达式。

匹配对象方法	描述
group(num=0)	匹配的整个表达式的字符串，group() 可以一次输入多个组号，在这种情况下它将返回一个包含那些组所对应值的元组。
groups()	返回一个包含所有小组字符串的元组，从 1 到所含的小组号。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21


#!/usr/bin/python
import re
 
line = "Cats are smarter than dogs";
 
matchObj = re.match( r'dogs', line, re.M|re.I)
if matchObj:
   print "match --> matchObj.group() : ", matchObj.group()
else:
   print "No match!!"
 
matchObj = re.search( r'dogs', line, re.M|re.I)
if matchObj:
   print "search --> searchObj.group() : ", matchObj.group()
else:
   print "No match!!"

"""
No match!!
search --> searchObj.group() :  dogs
"""

re.sub用于替换字符串中的匹配项

1

re.sub(pattern, repl, string, count=0, flags=0)

pattern : 正则中的模式字符串。
repl : 替换的字符串，也可为一个函数。
string : 要被查找替换的原始字符串。
count : 模式匹配后替换的最大次数，默认 0 表示替换所有的匹配。
flags : 编译时用的匹配模式，数字形式。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27


#!/usr/bin/python3
import re
 
phone = "2004-959-559 # 这是一个电话号码"
 
# 删除注释
num = re.sub(r'#.*$', "", phone)
print ("电话号码 : ", num)
 
# 移除非数字的内容
num = re.sub(r'\D', "", phone)
print ("电话号码 : ", num)

"""
电话号码是:  2004-959-559 
电话号码是 :  2004959559
"""
# 将匹配的数字乘以 2
def bb(matched):
    value = int(matched.group('aa'))
    return str(value * 2)

print(re.sub('(?P<aa>\d+)', bb, 'A23G4HFD567'))

"""
A46G8HFD1134
"""

compile 函数用于编译正则表达式，生成一个正则表达式（ Pattern ）对象，供 match() 和 search() 这两个函数使用。

1

re.compile(pattern[, flags])

pattern : 一个字符串形式的正则表达式

1
2
3
4
5
6


pattern = re.compile(r'([a-z]+) ([a-z]+)', re.I)
m = pattern.match('Hello World Wide Web')
print(m.group(0))   # 返回匹配成功的整个子串
"""
Hello World
"""

findall 在字符串中找到正则表达式所匹配的所有子串，并返回一个列表，如果有多个匹配模式，则返回元组列表，如果没有找到匹配的，则返回空列表。

注意： match 和 search 是匹配一次 findall 匹配所有。

1
2
3


re.findall(pattern, string, flags=0)
或
pattern.findall(string[, pos[, endpos]])

pattern 匹配模式。
string 待匹配的字符串。
pos 可选参数，指定字符串的起始位置，默认为 0。
endpos 可选参数，指定字符串的结束位置，默认为字符串的长度。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25


import re
 
result1 = re.findall(r'\d+','runoob 123 google 456')
 
pattern = re.compile(r'\d+')   # 查找数字
result2 = pattern.findall('runoob 123 google 456')
result3 = pattern.findall('run88oob123google456', 0, 10)
 
print(result1)
print(result2)
print(result3)
"""
['123', '456']
['123', '456']
['88', '12']
"""

# 多个匹配模式，返回元组列表：
import re

result = re.findall(r'(\w+)=(\d+)', 'set width=20 and height=10')
print(result)
"""
[('width', '20'), ('height', '10')]
"""

finditer 和 findall 类似，在字符串中找到正则表达式所匹配的所有子串，并把它们作为一个迭代器返回。

1

re.finditer(pattern, string, flags=0)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


import re
 
it = re.finditer(r"\d+","12a32bc43jf3") 
for match in it: 
    print (match.group() )
"""
12 
32 
43 
3
"""

split 方法按照能够匹配的子串将字符串分割后返回列表

1

re.split(pattern, string[, maxsplit=0, flags=0])

maxsplit 分割次数，maxsplit=1 分割一次，默认为 0，不限制次数。

1
2
3
4


re.split('\W+', 'runoob, runoob, runoob.')  # 如果找不到匹配的字符串，split 不会对其作出分割
"""
['runoob', 'runoob', 'runoob', '']
"""