使用Python创建和导出Jupyter Notebook的完整教程

2026-03-26 13:07:02发布 0次浏览

详情描述

1. 环境准备

首先确保安装了必要的库：

pip install notebook nbformat nbconvert

2. 使用nbformat创建Notebook

基本示例：创建简单的Notebook

import nbformat as nbf
from nbformat import v4 as nb

# 创建一个新的notebook
nb = nbf.v4.new_notebook()

# 添加一个markdown单元格
text = """# 我的第一个Python Notebook

这是一个使用nbformat创建的notebook。

## 主要内容
- 数据分析
- 可视化
- 机器学习
"""
nb['cells'].append(nbf.v4.new_markdown_cell(text))

# 添加一个代码单元格
code1 = """# 导入必要的库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

print("Hello, Jupyter Notebook!")"""
nb['cells'].append(nbf.v4.new_code_cell(code1))

# 添加另一个代码单元格
code2 = """# 创建一些数据
x = np.linspace(0, 10, 100)
y = np.sin(x)

# 绘制图形
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'b-', linewidth=2)
plt.title('正弦函数')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.grid(True)
plt.show()"""
nb['cells'].append(nbf.v4.new_code_cell(code2))

# 保存为ipynb文件
with open('my_notebook.ipynb', 'w', encoding='utf-8') as f:
    nbf.write(nb, f)

print("Notebook已创建: my_notebook.ipynb")

高级示例：包含多种单元格类型

import nbformat as nbf
from nbformat import v4 as nb
import json
import datetime

def create_advanced_notebook():
    # 创建notebook
    notebook = nbf.v4.new_notebook()

    # 添加元数据
    notebook.metadata = {
        "kernelspec": {
            "display_name": "Python 3",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "name": "python",
            "version": "3.8.0",
            "mimetype": "text/x-python",
            "codemirror_mode": {"name": "ipython", "version": 3},
            "pygments_lexer": "ipython3"
        }
    }

    # 1. 标题单元格
    title_cell = nbf.v4.new_markdown_cell(f"""# 数据分析报告
**创建时间**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
**作者**: Python Notebook Creator

## 项目概述
本项目演示了如何使用Python创建复杂的Jupyter Notebook。""")

    # 2. 目录单元格
    toc_cell = nbf.v4.new_markdown_cell("""## 目录
1. [数据准备](#数据准备)
2. [数据分析](#数据分析)
3. [可视化](#可视化)
4. [结论](#结论)""")

    # 3. 代码单元格 - 数据准备
    code_data_prep = """# 数据准备
import numpy as np
import pandas as pd

# 创建示例数据
np.random.seed(42)
data = {
    '日期': pd.date_range('2023-01-01', periods=100),
    '销售额': np.random.normal(1000, 200, 100).cumsum(),
    '访问量': np.random.randint(500, 2000, 100),
    '转化率': np.random.uniform(0.01, 0.05, 100)
}

df = pd.DataFrame(data)
print("数据形状:", df.shape)
print("\\n前5行数据:")
print(df.head())"""

    # 4. 代码单元格 - 数据分析
    code_analysis = """# 数据分析
print("数据统计描述:")
print(df.describe())

print("\\n数据类型:")
print(df.dtypes)

# 计算相关系数
correlation = df[['销售额', '访问量', '转化率']].corr()
print("\\n相关系数矩阵:")
print(correlation)"""

    # 5. 代码单元格 - 可视化
    code_visualization = """# 数据可视化
import matplotlib.pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 销售额趋势
axes[0, 0].plot(df['日期'], df['销售额'], 'g-', linewidth=2)
axes[0, 0].set_title('销售额趋势')
axes[0, 0].set_xlabel('日期')
axes[0, 0].set_ylabel('销售额')
axes[0, 0].grid(True)

# 访问量分布
axes[0, 1].hist(df['访问量'], bins=20, alpha=0.7, color='blue')
axes[0, 1].set_title('访问量分布')
axes[0, 1].set_xlabel('访问量')
axes[0, 1].set_ylabel('频次')

# 散点图
axes[1, 0].scatter(df['访问量'], df['销售额'], alpha=0.6, c='red')
axes[1, 0].set_title('访问量 vs 销售额')
axes[1, 0].set_xlabel('访问量')
axes[1, 0].set_ylabel('销售额')

# 箱线图
df_box = df[['销售额', '访问量']]
df_box.columns = ['Sales', 'Visits']
axes[1, 1].boxplot(df_box.values, labels=df_box.columns)
axes[1, 1].set_title('数据分布箱线图')

plt.tight_layout()
plt.show()"""

    # 6. 结论单元格
    conclusion_cell = nbf.v4.new_markdown_cell("""## 结论

### 主要发现
1. 销售额呈上升趋势
2. 访问量与销售额呈正相关
3. 转化率相对稳定

### 建议
1. 继续增加访问量
2. 优化转化路径
3. 监控异常数据""")

    # 将所有单元格添加到notebook
    cells = [
        title_cell,
        toc_cell,
        nbf.v4.new_code_cell(code_data_prep),
        nbf.v4.new_code_cell(code_analysis),
        nbf.v4.new_code_cell(code_visualization),
        conclusion_cell
    ]

    notebook['cells'] = cells

    return notebook

# 创建并保存notebook
advanced_nb = create_advanced_notebook()

with open('advanced_notebook.ipynb', 'w', encoding='utf-8') as f:
    nbf.write(advanced_nb, f)

print("高级Notebook已创建: advanced_notebook.ipynb")

3. 导出Notebook为其他格式

3.1 导出为HTML

import nbformat
from nbconvert import HTMLExporter
import os

def export_to_html(ipynb_file, output_file=None):
    """将notebook导出为HTML"""
    if output_file is None:
        output_file = os.path.splitext(ipynb_file)[0] + '.html'

    # 读取notebook
    with open(ipynb_file, 'r', encoding='utf-8') as f:
        notebook = nbformat.read(f, as_version=4)

    # 创建HTML导出器
    html_exporter = HTMLExporter()
    html_exporter.template_name = 'classic'  # 可选: 'classic', 'lab'

    # 导出为HTML
    (body, resources) = html_exporter.from_notebook_node(notebook)

    # 保存HTML文件
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(body)

    print(f"已导出为HTML: {output_file}")
    return output_file

# 使用示例
export_to_html('my_notebook.ipynb', 'my_notebook.html')

3.2 导出为PDF

def export_to_pdf(ipynb_file, output_file=None):
    """将notebook导出为PDF"""
    if output_file is None:
        output_file = os.path.splitext(ipynb_file)[0] + '.pdf'

    # 需要安装: pip install nbconvert[webpdf]
    import subprocess

    try:
        # 使用命令行转换
        cmd = [
            'jupyter', 'nbconvert',
            '--to', 'pdf',
            '--TemplateExporter.exclude_input=False',
            ipynb_file,
            '--output', output_file
        ]

        result = subprocess.run(cmd, capture_output=True, text=True)

        if result.returncode == 0:
            print(f"已导出为PDF: {output_file}")
        else:
            print(f"PDF导出失败: {result.stderr}")

    except Exception as e:
        print(f"PDF导出错误: {e}")
        print("请确保已安装: pip install nbconvert[webpdf] latex")

# 使用示例
export_to_pdf('my_notebook.ipynb')

3.3 导出为Markdown

def export_to_markdown(ipynb_file, output_file=None):
    """将notebook导出为Markdown"""
    if output_file is None:
        output_file = os.path.splitext(ipynb_file)[0] + '.md'

    from nbconvert import MarkdownExporter

    # 读取notebook
    with open(ipynb_file, 'r', encoding='utf-8') as f:
        notebook = nbformat.read(f, as_version=4)

    # 创建Markdown导出器
    md_exporter = MarkdownExporter()

    # 导出为Markdown
    (body, resources) = md_exporter.from_notebook_node(notebook)

    # 保存Markdown文件
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(body)

    print(f"已导出为Markdown: {output_file}")
    return output_file

# 使用示例
export_to_markdown('my_notebook.ipynb')

3.4 导出为Python脚本

def export_to_python(ipynb_file, output_file=None):
    """将notebook导出为Python脚本"""
    if output_file is None:
        output_file = os.path.splitext(ipynb_file)[0] + '.py'

    from nbconvert import PythonExporter

    # 读取notebook
    with open(ipynb_file, 'r', encoding='utf-8') as f:
        notebook = nbformat.read(f, as_version=4)

    # 创建Python导出器
    py_exporter = PythonExporter()

    # 导出为Python脚本
    (body, resources) = py_exporter.from_notebook_node(notebook)

    # 保存Python文件
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(body)

    print(f"已导出为Python脚本: {output_file}")
    return output_file

# 使用示例
export_to_python('my_notebook.ipynb')

4. 批量处理Notebook

import os
import glob

def batch_convert_notebooks(input_pattern, output_format='html'):
    """
    批量转换notebook文件

    参数:
    input_pattern: 输入文件模式，如 '*.ipynb' 或 'notebooks/*.ipynb'
    output_format: 输出格式，可选 'html', 'pdf', 'markdown', 'python'
    """

    # 获取所有匹配的notebook文件
    notebook_files = glob.glob(input_pattern)

    if not notebook_files:
        print(f"未找到匹配的文件: {input_pattern}")
        return

    print(f"找到 {len(notebook_files)} 个notebook文件")

    # 根据格式选择导出函数
    export_functions = {
        'html': export_to_html,
        'pdf': export_to_pdf,
        'markdown': export_to_markdown,
        'python': export_to_python
    }

    if output_format not in export_functions:
        print(f"不支持的输出格式: {output_format}")
        return

    export_func = export_functions[output_format]

    # 批量处理
    for nb_file in notebook_files:
        try:
            print(f"正在处理: {nb_file}")
            export_func(nb_file)
        except Exception as e:
            print(f"处理失败 {nb_file}: {e}")

# 使用示例
# batch_convert_notebooks('*.ipynb', 'html')
# batch_convert_notebooks('notebooks/*.ipynb', 'markdown')

5. 读取和修改现有Notebook

def read_and_modify_notebook(ipynb_file, output_file=None):
    """读取并修改现有的notebook"""
    if output_file is None:
        output_file = 'modified_' + ipynb_file

    # 读取notebook
    with open(ipynb_file, 'r', encoding='utf-8') as f:
        notebook = nbformat.read(f, as_version=4)

    # 添加新的markdown单元格
    new_md_cell = nbf.v4.new_markdown_cell("""## 新增内容
这个单元格是通过程序添加的。""")
    notebook['cells'].append(new_md_cell)

    # 添加新的代码单元格
    new_code_cell = nbf.v4.new_code_cell("""# 新增的代码单元格
print("这段代码是后来添加的")""")
    notebook['cells'].append(new_code_cell)

    # 保存修改后的notebook
    with open(output_file, 'w', encoding='utf-8') as f:
        nbf.write(notebook, f)

    print(f"已修改并保存: {output_file}")
    return output_file

# 使用示例
# read_and_modify_notebook('my_notebook.ipynb')

6. 创建模板Notebook

def create_template_notebook(template_type='data_science', output_file='template.ipynb'):
    """创建不同用途的模板notebook"""

    templates = {
        'data_science': {
            'title': '数据科学项目模板',
            'cells': [
                nbf.v4.new_markdown_cell("""# 数据科学项目模板

## 项目简介
[在这里描述项目目的和背景]

## 目标
1. [目标1]
2. [目标2]
3. [目标3]"""),

                nbf.v4.new_markdown_cell("""## 1. 环境设置"""),

                nbf.v4.new_code_cell("""# 导入必要的库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')
plt.style.use('seaborn')
sns.set_palette("husl")

print("所有库已导入")"""),

                nbf.v4.new_markdown_cell("""## 2. 数据加载"""),

                nbf.v4.new_code_cell("""# 加载数据
# df = pd.read_csv('data.csv')
# df = pd.read_excel('data.xlsx')
print("请在此处加载您的数据")""")
            ]
        },

        'machine_learning': {
            'title': '机器学习项目模板',
            'cells': [
                nbf.v4.new_markdown_cell("""# 机器学习项目模板

## 机器学习流水线
1. 数据探索
2. 数据预处理
3. 特征工程
4. 模型训练
5. 模型评估
6. 模型部署""")
            ]
        }
    }

    if template_type not in templates:
        print(f"未知的模板类型: {template_type}")
        return

    # 创建notebook
    notebook = nbf.v4.new_notebook()
    template = templates[template_type]

    # 添加元数据
    notebook.metadata = {
        "kernelspec": {
            "display_name": "Python 3",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "name": "python",
            "version": "3.8.0"
        }
    }

    # 添加单元格
    notebook['cells'] = template['cells']

    # 保存
    with open(output_file, 'w', encoding='utf-8') as f:
        nbf.write(notebook, f)

    print(f"模板已创建: {output_file}")
    return output_file

# 使用示例
# create_template_notebook('data_science', 'ds_template.ipynb')

7. 完整示例：综合应用

def comprehensive_example():
    """完整的创建和导出示例"""

    print("=" * 50)
    print("Jupyter Notebook 创建和导出工具")
    print("=" * 50)

    # 1. 创建notebook
    print("\n1. 创建基础notebook...")
    create_advanced_notebook()

    # 2. 导出为各种格式
    print("\n2. 导出为不同格式...")

    # HTML
    html_file = export_to_html('advanced_notebook.ipynb')

    # Markdown
    md_file = export_to_markdown('advanced_notebook.ipynb')

    # Python脚本
    py_file = export_to_python('advanced_notebook.ipynb')

    print("\n3. 批量处理示例...")
    # 创建一些示例notebooks
    for i in range(3):
        nb = nbf.v4.new_notebook()
        nb['cells'] = [
            nbf.v4.new_markdown_cell(f"# Notebook {i+1}"),
            nbf.v4.new_code_cell(f"print('这是第{i+1}个notebook')")
        ]
        with open(f'example_{i+1}.ipynb', 'w') as f:
            nbf.write(nb, f)

    # 批量转换
    print("批量转换example*.ipynb为HTML...")
    for file in glob.glob('example_*.ipynb'):
        export_to_html(file)

    print("\n4. 清理临时文件...")
    # 清理示例文件
    for file in glob.glob('example_*.ipynb'):
        os.remove(file)
        print(f"已删除: {file}")

    print("\n✅ 所有操作完成！")
    print(f"生成的notebook: advanced_notebook.ipynb")
    print(f"HTML版本: {html_file}")
    print(f"Markdown版本: {md_file}")
    print(f"Python脚本: {py_file}")

# 运行完整示例
if __name__ == "__main__":
    comprehensive_example()

8. 使用Papermill执行Notebook

# 首先安装: pip install papermill

def execute_notebook(input_nb, output_nb=None, parameters=None):
    """
    使用Papermill执行notebook

    参数:
    input_nb: 输入notebook路径
    output_nb: 输出notebook路径
    parameters: 传递给notebook的参数
    """
    try:
        import papermill as pm

        if output_nb is None:
            output_nb = 'executed_' + input_nb

        if parameters is None:
            parameters = {}

        # 执行notebook
        pm.execute_notebook(
            input_nb,
            output_nb,
            parameters=parameters,
            kernel_name='python3'
        )

        print(f"Notebook已执行并保存: {output_nb}")

    except ImportError:
        print("请先安装papermill: pip install papermill")
    except Exception as e:
        print(f"执行notebook失败: {e}")

# 使用示例
# execute_notebook('my_notebook.ipynb', parameters={'data_path': 'data.csv'})

总结

这个教程涵盖了：

创建Notebook：使用nbformat创建各种类型的单元格 导出功能：转换为HTML、PDF、Markdown、Python脚本 批量处理：自动化处理多个notebook文件 模板系统：创建可重用的notebook模板 修改功能：读取和修改现有notebook 执行工具：使用Papermill执行notebook

安装所有依赖

pip install notebook nbformat nbconvert papermill matplotlib pandas numpy seaborn

这个工具集可以帮助您自动化Jupyter Notebook的创建、修改、转换和执行工作流。