Python Regex Cheat Sheet

New

Python re module: patterns, flags, match/search/findall, groups, and substitution

Pattern Syntax

Character Classes

Match specific sets of characters

. # any char except newline \d # digit [0-9] \D # non-digit \w # word char [a-zA-Z0-9_] \W # non-word char \s # whitespace [abc] # a, b, or c [^abc] # not a, b, or c [a-z] # lowercase letter

Quantifiers

Control how many times a pattern repeats

* # 0 or more (greedy) + # 1 or more (greedy) ? # 0 or 1 (optional) {3} # exactly 3 times {2,5} # 2 to 5 times *? # 0 or more (non-greedy) +? # 1 or more (non-greedy)

Anchors

Match positions in a string, not characters

^ # start of string (or line with re.M) $ # end of string (or line with re.M) \b # word boundary \B # non-word boundary \A # absolute start of string \Z # absolute end of string

Groups

Capture and reference parts of a match

(abc) # capturing group (?:abc) # non-capturing group (?P<name>abc) # named group (a|b|c) # alternation (a or b or c) (?=abc) # positive lookahead (?!abc) # negative lookahead

Flags

re.IGNORECASE (re.I)

Make pattern matching case-insensitive

re.search(r"python", text, re.IGNORECASE) # matches "Python", "PYTHON", "python"

re.MULTILINE (re.M)

Make ^ and $ match start/end of each line

re.findall(r"^\w+", text, re.MULTILINE) # matches first word on each line

re.DOTALL (re.S)

Make . match newline characters too

re.search(r"start.*end", text, re.DOTALL) # matches across multiple lines

re.VERBOSE (re.X)

Allow whitespace and comments in patterns

pattern = re.compile(r""" \d{4} # year - # separator \d{2} # month """, re.VERBOSE)

Functions

re.match vs re.search

match checks only at start; search checks anywhere

import re re.match(r"\d+", "abc123") # None (not at start) re.search(r"\d+", "abc123") # match object for "123"

re.findall & re.finditer

Find all occurrences as a list or iterator

re.findall(r"\d+", "a1 b22 c333") # ["1", "22", "333"] for m in re.finditer(r"\d+", text): print(m.group(), m.start(), m.end())

re.sub

Replace matches with a string or function

re.sub(r"\s+", " ", text) # collapse whitespace re.sub(r"(\w+)", r"[\1]", text) # wrap words in brackets re.sub(r"\d+", lambda m: str(int(m.group())*2), text)

re.split

Split string at each match

re.split(r"[,;\s]+", "a, b; c d") # ["a", "b", "c", "d"] re.split(r"(,)", "a,b,c") # ["a", ",", "b", ",", "c"]

re.compile

Compile a pattern for repeated use

pattern = re.compile(r"\b\w{5}\b", re.IGNORECASE) pattern.findall("Hello world there") # ["Hello", "world", "there"]

Groups & Backreferences

Named Groups

Access match groups by name for clarity

m = re.search(r"(?P<year>\d{4})-(?P<month>\d{2})", "2024-01") m.group("year") # "2024" m.group("month") # "01" m.groupdict() # {"year": "2024", "month": "01"}

Backreferences

Refer back to a captured group in the same pattern

re.search(r"(\w+) \1", "hello hello") # matches repeated word re.sub(r"(\w+) \1", r"\1", text) # remove duplicates

Common Patterns

Email Validation

Match a common email address format

import re

EMAIL_RE = re.compile(
    r"^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$"
)

def is_valid_email(email: str) -> bool:
    return bool(EMAIL_RE.match(email))

is_valid_email("user@example.com")   # True
is_valid_email("not-an-email")       # False

URL Extraction

Find all URLs in a block of text

import re

URL_RE = re.compile(
    r'https?://[^\s<>"{}|\\^\[\]]+',
    re.IGNORECASE
)

text = "Visit https://example.com or http://docs.python.org"
urls = URL_RE.findall(text)
# ["https://example.com", "http://docs.python.org"]

Log Parsing

Extract structured data from log lines

import re
from datetime import datetime

LOG_RE = re.compile(
    r"(?P<date>\d{4}-\d{2}-\d{2}) "
    r"(?P<time>\d{2}:\d{2}:\d{2}) "
    r"(?P<level>\w+) "
    r"(?P<message>.+)"
)

line = "2024-01-15 14:30:00 ERROR Database connection failed"
m = LOG_RE.match(line)
if m:
    print(m.group("level"))    # ERROR
    print(m.group("message"))  # Database connection failed

Tips & Best Practices

Always use raw strings (r"\d+") for regex patterns to avoid double-escaping backslashes

Use re.compile() when applying the same pattern multiple times for better performance

Use named groups ((?P<name>...)) in complex patterns to make matches self-documenting

Test patterns interactively at regex101.com with Python flavor selected

Prefer non-greedy quantifiers (*?, +?) when matching HTML or nested structures