python 日常使用自动化脚本

描述：使用 Tesseract 进行 OCR 识别。描述：根据文件扩展名将目录中的文件组织到子目录中。描述：连接到 SQLite 数据库并执行查询。描述：读取和写入 Excel 文件。描述：在文件中查找并替换特定文本。描述：从 PDF 文件中提取文本。描述：删除指定目录中的空文件夹。描述：监控系统中的可用磁盘空间。描述：批量重命名目录中的文件。描述：计算文本文件中的单词数。描述：删除数据集中的重复

风车带走过往

74人浏览 · 2025-01-20 11:22:54

风车带走过往 · 2025-01-20 11:22:54 发布

文件管理自动化

按扩展名排序文件

描述：根据文件扩展名将目录中的文件组织到子目录中。

import os
from shutil import move
def sort_files(directory_path):
    for filename in os.listdir(directory_path):
        if os.path.isfile(os.path.join(directory_path, filename)):
            file_extension = filename.split('.')[-1]
            destination_directory = os.path.join(directory_path, file_extension)
            if not os.path.exists(destination_directory):
                os.makedirs(destination_directory)
            move(os.path.join(directory_path, filename), os.path.join(destination_directory, filename))
# 使用示例
sort_files('/path/to/directory')

删除空文件夹

描述：删除指定目录中的空文件夹。


import os
def remove_empty_folders(directory_path):
    for root, dirs, files in os.walk(directory_path, topdown=False):
        for folder in dirs:
            folder_path = os.path.join(root, folder)
            if not os.listdir(folder_path):
                os.rmdir(folder_path)
# 使用示例
remove_empty_folders('/path/to/directory')

重命名多个文件

描述：批量重命名目录中的文件。

import os
def rename_files(directory_path, old_name, new_name):
    for filename in os.listdir(directory_path):
        if old_name in filename:
            new_filename = filename.replace(old_name, new_name)
            os.rename(os.path.join(directory_path, filename), os.path.join(directory_path, new_filename))
# 使用示例
rename_files('/path/to/directory', 'old', 'new')

网络抓取

从网站提取数据

描述：从网站上抓取数据。

import requests
from bs4 import BeautifulSoup
def scrape_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # 从网站提取相关数据的代码在此处
    return soup
# 使用示例
url = 'https://example.com'
soup = scrape_data(url)
print(soup.title.string)

批量下载图片

描述：从网站批量下载图片。


import requests
def download_images(url, save_directory):
    response = requests.get(url)
    if response.status_code == 200:
        images = response.json()  # 假设API返回一个图片URL的JSON数组
        for index, image_url in enumerate(images):
            image_response = requests.get(image_url)
            if image_response.status_code == 200:
                with open(f"{save_directory}/image_{index}.jpg", "wb") as f:
                    f.write(image_response.content)
# 使用示例
download_images('https://api.example.com/images', '/path/to/save')

文件处理和操作

计算文本文件中的单词数

描述：计算文本文件中的单词数。


def count_words(file_path):
    with open(file_path, 'r') as f:
        text = f.read()
    word_count = len(text.split())
    return word_count
# 使用示例
word_count = count_words('/path/to/file.txt')
print(f"Word count: {word_count}")

查找和替换文本

描述：在文件中查找并替换特定文本。


def find_replace(file_path, search_text, replace_text):
    with open(file_path, 'r') as f:
        text = f.read()
    modified_text = text.replace(search_text, replace_text)
    with open(file_path, 'w') as f:
        f.write(modified_text)
# 使用示例
find_replace('/path/to/file.txt', 'old', 'new')

邮件自动化

发送个性化邮件
描述：发送个性化邮件。


import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
def send_personalized_email(sender_email, sender_password, recipients, subject, body):
    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.starttls()
    server.login(sender_email, sender_password)
    for recipient_email in recipients:
        message = MIMEMultipart()
        message['From'] = sender_email
        message['To'] = recipient_email
        message['Subject'] = subject
        message.attach(MIMEText(body, 'plain'))
        server.send_message(message)
    server.quit()
# 使用示例
sender_email = 'your_email@gmail.com'
sender_password = 'your_password'
recipients = ['recipient1@example.com', 'recipient2@example.com']
subject = 'Hello'
body = 'This is a test email.'
send_personalized_email(sender_email, sender_password, recipients, subject, body)

Excel 电子表格自动化

读取和写入 Excel
描述：读取和写入 Excel 文件。


import pandas as pd
def read_excel(file_path):
    df = pd.read_excel(file_path)
    return df
def write_to_excel(data, file_path):
    df = pd.DataFrame(data)
    df.to_excel(file_path, index=False)
# 使用示例
data = {'Column1': [1, 2, 3], 'Column2': [4, 5, 6]}
write_to_excel(data, '/path/to/output.xlsx')
df = read_excel('/path/to/output.xlsx')
print(df)

数据清洗和转换

删除数据中的重复数据
描述：删除数据集中的重复行。

import pandas as pd
def remove_duplicates(file_path):
    df = pd.read_excel(file_path)
    df.drop_duplicates(inplace=True)
    df.to_excel(file_path, index=False)
# 使用示例
remove_duplicates('/path/to/data.xlsx')

图像编辑自动化

调整图像大小

from PIL import Image
def resize_image(input_path, output_path, width, height):
    image = Image.open(input_path)
    resized_image = image.resize((width, height), Image.ANTIALIAS)
    resized_image.save(output_path)
# 使用示例
resize_image('/path/to/input.jpg', '/path/to/output.jpg', 800, 600)

系统任务自动化

监控磁盘空间
描述：监控系统中的可用磁盘空间。

import shutil
def check_disk_space(path, threshold):
    total, used, free = shutil.disk_usage(path)
    free_gb = free // (2**30)
    if free_gb < threshold:
        print(f"Warning: Free disk space is below {threshold} GB.")
    else:
        print(f"Free disk space: {free_gb} GB.")
# 使用示例
check_disk_space('/', 10)

网络自动化

检查网站状态
描述：检查网站的状态。

import requests
def check_website_status(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            print(f"Website {url} is up and running.")
        else:
            print(f"Website {url} returned status code {response.status_code}.")
    except requests.exceptions.RequestException as e:
        print(f"Error accessing website {url}: {e}")
# 使用示例
check_website_status('https://example.com')

PDF 操作自动化

从 PDF 中提取文本
描述：从 PDF 文件中提取文本。


import PyPDF2
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfFileReader(file)
        text = ''
        for page_num in range(reader.numPages):
            page = reader.getPage(page_num)
            text += page.extractText()
    return text
# 使用示例
text = extract_text_from_pdf('/path/to/document.pdf')
print(text)

OCR识别

识别图像中的文本
描述：使用 Tesseract 进行 OCR 识别。


import pytesseract
from PIL import Image
def recognize_text(image_path):
    image = Image.open(image_path)
    text = pytesseract.image_to_string(image,)  # 使用简体中文
    return text
# 使用示例
text = recognize_text('/path/to/image.jpg')
print(text)

数据库交互

连接到数据库
描述：连接到 SQLite 数据库并执行查询。


import sqlite3
def connect_to_database(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    return conn, cursor
def execute_query(cursor, query):
    cursor.execute(query)
    results = cursor.fetchall()
    return results
# 使用示例
conn, cursor = connect_to_database('/path/to/database.db')
query = 'SELECT * FROM table_name'
results = execute_query(cursor, query)
print(results)
conn.close()

将文件上传到 AWS S3

描述：将文件上传到 AWS S3 存储桶。

import boto3
def upload_to_s3(bucket_name, file_path, object_name):
    s3 = boto3.client('s3')
    s3.upload_file(file_path, bucket_name, object_name)
# 使用示例
bucket_name = 'your-bucket-name'
file_path = '/path/to/file.txt'
object_name = 'file.txt'
upload_to_s3(bucket_name, file_path, object_name)