feat: 新增poi模块处理excel及word

master
old-tom 2 years ago
parent 5ba41f5690
commit 8c4add0a02

@ -0,0 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/4/2 9:07
# @Author : old tom
# @File : __init__.py.py
# @Project : futool
# @Desc : excel、word操作相关

@ -0,0 +1,213 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/4/2 9:08
# @Author : old tom
# @File : fu_excel.py
# @Project : futool
# @Desc : 读写excel,需要引入openPyxl
from futool.core import fu_file
from openpyxl import load_workbook, Workbook
class ExcelNotFoundError(Exception):
"""
excel文件不存在
"""
def __init__(self, msg=''):
Exception.__init__(self, msg)
class SheetNotExistError(Exception):
"""
sheet不存在
"""
def __init__(self, msg=''):
Exception.__init__(self, msg)
class ExcelReader(object):
"""
excel读取
TODO 存在性能问题需要重构;增加流式读取功能;sheet分片方式存在问题
"""
class SheetNotLoadError(Exception):
def __init__(self):
Exception.__init__(self, 'sheet not load,you need call load_sheet()')
class OutOfColIndexError(Exception):
def __init__(self, msg):
Exception.__init__(self, msg)
def __init__(self, file_path):
if not fu_file.exist(file_path):
raise ExcelNotFoundError(msg=f'path={file_path}')
self.wb = load_workbook(file_path, read_only=True)
self.sheetnames = self.wb.sheetnames
self.sheet = None
self.col_nums = 0
def load_sheet(self, sheet_name: str):
"""
指定名称读取sheet
"""
if sheet_name not in self.sheetnames:
raise SheetNotExistError(msg=f'{sheet_name} not exists')
self.sheet = self.wb[sheet_name]
return self.sheet
def load_sheet_by_index(self, sheet_index: int):
"""
指定下标读取sheet,从0开始
"""
return self.load_sheet(self.sheetnames[sheet_index])
def load_sheet_first(self):
"""
加载第一个sheet
:return:
"""
return self.load_sheet_by_index(0)
def read_row(self, row_index):
"""
读取某一行
"""
self._check_sheet()
element = []
for i, row in enumerate(self.sheet):
if i == row_index:
for cell in row:
element.append(cell.value)
return element
def read_range_rows(self, start, end):
"""
范围读取
"""
self._check_sheet()
elements = []
for i, row in enumerate(self.sheet):
if start <= i <= end:
element = []
for cell in row:
element.append(cell.value)
elements.append(tuple(element))
return elements
def read_rows(self, row_index: [] = None):
"""
指定读取多行
:param row_index: 行号,例如:[1,3,5]
:return:
"""
self._check_sheet()
elements = []
for i, row in enumerate(self.sheet):
if i in row_index:
element = []
for cell in row:
element.append(cell.value)
elements.append(tuple(element))
return elements
def read_first(self):
"""
读取标题行
"""
return self.read_row(0)
def read_all(self, skip_head=True):
"""
读取全部
:return:
"""
self._check_sheet()
elements = []
for i, row in enumerate(self.sheet):
if skip_head:
if i > 0:
element = []
for cell in row:
element.append(cell.value)
elements.append(tuple(element))
return elements
def read_column(self, col_index, skip_head=True):
"""
按列读取
:param col_index: 列下标从0开始
:param skip_head: 跳过第一行
:return: list
"""
self._check_sheet()
element = []
for i, row in enumerate(self.sheet):
if skip_head:
if i > 0:
for j, cell in enumerate(row):
if j == col_index:
element.append(cell.value)
return element
def read_range_column(self, start, end, skip_head=True):
"""
范围读取列,下标从0开始
:param start: 开始下标
:param end: 结束下标
:param skip_head: 跳过第一行
:return:
"""
self._col_nums()
if end >= self.col_nums:
raise self.OutOfColIndexError(msg=f'out of column index,max col was {self.col_nums} ')
elements = []
for i in range(start, end + 1):
elements.append(tuple(self.read_column(i, skip_head)))
return elements
def _check_sheet(self):
if self.sheet is None:
raise self.SheetNotLoadError()
def _col_nums(self):
self.col_nums = len(self.read_first())
class SimpleExcelWriter(object):
"""
excel写入
"""
class ExcelFileExistsError(Exception):
"""
excel文件已存在
"""
def __init__(self, msg=''):
Exception.__init__(self, msg)
def __init__(self, write_path):
if fu_file.exist(write_path):
raise self.ExcelFileExistsError(f'{write_path} all ready exists')
self.write_path = write_path
self.wb = Workbook(write_only=True)
def write(self, head: [], data: [], sheet_name='Sheet1', index=0):
"""
写excel文件
:param index: sheet 下标
:param head: 第一行标题
:param data: 数据
:param sheet_name: sheet名称
:return:
"""
ws = self.wb.create_sheet(title=sheet_name, index=index)
# 写入列头
ws.append(head)
for d in data:
ws.append(d)
self.wb.save(self.write_path)

@ -0,0 +1 @@
openpyxl~=3.1.2

@ -0,0 +1,84 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/4/2 9:01
# @Author : old tom
# @File : test_fu_date.py
# @Project : futool
from unittest import TestCase
from futool.core import fu_date
# @Desc :
class Test(TestCase):
def test_current_year(self):
rt = fu_date.current_year()
self.assertTrue(rt)
def test_current_month(self):
self.fail()
def test_current_day(self):
self.fail()
def test_current_date(self):
self.fail()
def test_current_datetime(self):
self.fail()
def test_current_time(self):
self.fail()
def test_current_timestamp(self):
self.fail()
def test_format_datetime_str(self):
self.fail()
def test_format_date_str(self):
self.fail()
def test_datetime_2_second(self):
self.fail()
def test_sec_2_datatime(self):
self.fail()
def test_is_leap(self):
self.fail()
def test_begin_of_week(self):
self.fail()
def test_end_of_week(self):
self.fail()
def test_end_of_month(self):
self.fail()
def test_weekday(self):
self.fail()
def test_age(self):
self.fail()
def test_age_of_now(self):
self.fail()
def test_between(self):
self.fail()
def test_time_offset(self):
self.fail()
def test_is_am(self):
self.fail()
def test_is_pm(self):
self.fail()
def test_next_week(self):
self.fail()
def test_next_month(self):
self.fail()

@ -0,0 +1,67 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/4/2 9:33
# @Author : old tom
# @File : test_fu_excel.py
# @Project : futool
from unittest import TestCase
from futool.poi.fu_excel import ExcelReader, SimpleExcelWriter
ex_reader = ExcelReader(file_path=r'D:\test\test3.xlsx')
# @Desc :
class TestExcelReader(TestCase):
def test_read_sheet(self):
sheet = ex_reader.load_sheet('Sheet1')
# for row in sheet:
# for cell in row:
# print(cell.value)
for i, row in enumerate(sheet):
if i == 0:
for cell in row:
print(cell.value)
def test_read_sheet_by_index(self):
sheet = ex_reader.load_sheet_by_index(0)
for row in sheet:
for cell in row:
print(cell.value)
def test_read_row(self):
ex_reader.load_sheet('Sheet1')
print(ex_reader.read_row(0))
def test_read_range_row(self):
ex_reader.load_sheet('Sheet1')
print(ex_reader.read_range_rows(1, 3))
def test_read_column(self):
ex_reader.load_sheet('Sheet1')
print(ex_reader.read_column(4))
def test_read_rows(self):
ex_reader.load_sheet('Sheet1')
print(ex_reader.read_rows(row_index=[0, 3]))
def test_read_all(self):
ex_reader.load_sheet('Sheet1')
print(ex_reader.read_all())
def test_read_range_column(self):
ex_reader.load_sheet('Sheet1')
print(ex_reader.read_range_column(1, 4))
class TestExcelWriter(TestCase):
def test_write(self):
ew = SimpleExcelWriter(r'D:\test\test3.xlsx')
# ew.write(head=['序号', '姓名', '年龄', '身份证', '住址'], data=[('1', '张三', '1', '101111', '特特特特特')])
ew.write(head=['序号', '姓名', '年龄', '身份证', '住址'], data=self.gen_big_excel())
def gen_big_excel(self):
big_data = []
for i in range(0, 999999):
big_data.append(('1', '张三', '1', '101111', '特特特特特'))
return big_data
Loading…
Cancel
Save