# -*- coding: utf-8 -*- from gtts.tokenizer import RegexBuilder, symbols def tone_marks(): """Keep tone-modifying punctuation by matching following character. Assumes the `tone_marks` pre-processor was run for cases where there might not be any space after a tone-modifying punctuation mark. """ return RegexBuilder( pattern_args=symbols.TONE_MARKS, pattern_func=lambda x: u"(?<={}).".format(x) ).regex def period_comma(): """Period and comma case. Match if not preceded by "." and only if followed by space. Won't cut in the middle/after dotted abbreviations; won't cut numbers. Note: Won't match if a dotted abbreviation ends a sentence. Note: Won't match the end of a sentence if not followed by a space. """ return RegexBuilder( pattern_args=symbols.PERIOD_COMMA, pattern_func=lambda x: r"(?