# ############################################################# # # format table function # ############################################################# # def format_table(header, rows): # 计算列宽 col_width = max(len(str(word)) for row in rows for word in row) + 2 # 最大单词长度 + 2 作为列宽 # 打印表头 print("".join(word.ljust(col_width) for word in header)) # 打印分隔线 print("".join("-" * col_width for _ in header)) # 打印内容 for row in rows: print("".join(str(word).ljust(col_width) for word in row)) # ############################################################# # # encode and decode bytes and string # ############################################################# # import base64 def encode_bytes2str(data): # 将字节串编码为Base64 if data is None: return None return base64.b64encode(data).decode('utf-8') def decode_str2bytes(data): # 将Base64编码的字节串解码为字节串 if data is None: return None return base64.b64decode(data.encode('utf-8')) import re def split_sentences(text: str): # 定义中文标点符号的正则表达式 pattern = r'[\。\,\、\;\:\?\!\“\”\(\)\《\》]+' # 使用正则表达式分割字符串 sentences = re.split(pattern, text) # 过滤掉空字符串 sentences = [sentence for sentence in sentences if sentence] return sentences ''' # 示例文本 text = "今天天气真好,我们去公园玩吧!你觉得怎么样?好的,那就这么定了。" # 调用函数进行断句 sentences = split_sentences(text) print(sentences) ''' def split_chinese_text(text: str, return_patch=False, punctuations=None): # 定义中文标点符号集合 punctuations = set('。!?,;:、“”()《》【】') # 初始化断句结果列表和标点符号列表 sentences = [] punctuation_list = [] text_patch = [] start = 0 # 断句开始位置 for i, char in enumerate(text): if char in punctuations: # 如果当前字符是标点符号,则进行断句,并记录标点符号 sentences.append(text[start:i+1]) punctuation_list.append(char) start = i + 1 # 更新断句开始位置 # 处理最后一句(如果最后一句后没有标点符号) if start < len(text): sentences.append(text[start:]) if return_patch: if len(punctuation_list) == 0: return [text], False # 有残留语句 elif len(sentences) == len(punctuation_list): return [''.join(sentences)], True else: return [''.join(sentences[:-1]), sentences[-1]], True return sentences, punctuation_list ''' # 示例文本 text = "你好,世界!今天天气怎么样?希望你有一个美好的一天。{}" sentences, punctuation_list = split_chinese_text(text) print("断句结果:", sentences) print("标点符号列表:", punctuation_list) ''' def remove_brackets_and_contents(text): # 使用sub函数替换匹配的文本为空字符串 result = re.sub(r'\(.*?\)', '', text) result = re.sub(r'\(.*?\)', '', result) result = re.sub(r'\【.*?\】', '', result) return result