fix: tagged_speakers splits on single newlines, not just double newlines
Each line starting with [Tag] now begins a new segment so users don't need blank lines between tagged speeches. Continuation lines (no tag) are joined to the previous tagged segment for multi-line speeches. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+28
-7
@@ -186,25 +186,46 @@ class OmniVoiceGenerate:
|
|||||||
spk_mode = speakers_data["mode"]
|
spk_mode = speakers_data["mode"]
|
||||||
label_map = {s["label"].lower(): i for i, s in enumerate(speaker_list)}
|
label_map = {s["label"].lower(): i for i, s in enumerate(speaker_list)}
|
||||||
|
|
||||||
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
|
|
||||||
if not paragraphs:
|
|
||||||
raise ValueError("OmniVoice Multi-Speaker: no paragraphs found in text.")
|
|
||||||
|
|
||||||
if spk_mode == "alternate_paragraphs":
|
if spk_mode == "alternate_paragraphs":
|
||||||
|
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
|
||||||
|
if not paragraphs:
|
||||||
|
raise ValueError("OmniVoice Multi-Speaker: no paragraphs found in text.")
|
||||||
segments = [
|
segments = [
|
||||||
(para, speaker_list[i % len(speaker_list)])
|
(para, speaker_list[i % len(speaker_list)])
|
||||||
for i, para in enumerate(paragraphs)
|
for i, para in enumerate(paragraphs)
|
||||||
]
|
]
|
||||||
else: # tagged_speakers
|
else: # tagged_speakers
|
||||||
|
# In tagged mode each line that starts with [Tag] begins a new segment.
|
||||||
|
# Continuation lines (no tag) are appended to the previous segment so
|
||||||
|
# multi-line speeches stay together. Both \n and \n\n separators work.
|
||||||
|
raw_segments: list[list[str]] = []
|
||||||
|
current: list[str] = []
|
||||||
|
for line in text.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if _TAG_RE.match(line):
|
||||||
|
if current:
|
||||||
|
raw_segments.append(current)
|
||||||
|
current = [line]
|
||||||
|
else:
|
||||||
|
current.append(line)
|
||||||
|
if current:
|
||||||
|
raw_segments.append(current)
|
||||||
|
|
||||||
|
if not raw_segments:
|
||||||
|
raise ValueError("OmniVoice Multi-Speaker: no tagged segments found in text.")
|
||||||
|
|
||||||
segments = []
|
segments = []
|
||||||
for para in paragraphs:
|
for lines in raw_segments:
|
||||||
m = _TAG_RE.match(para)
|
joined = " ".join(lines)
|
||||||
|
m = _TAG_RE.match(joined)
|
||||||
if m:
|
if m:
|
||||||
tag = m.group(1).strip().lower()
|
tag = m.group(1).strip().lower()
|
||||||
body = m.group(2).strip()
|
body = m.group(2).strip()
|
||||||
spk = speaker_list[label_map.get(tag, 0)]
|
spk = speaker_list[label_map.get(tag, 0)]
|
||||||
else:
|
else:
|
||||||
body = para
|
body = joined
|
||||||
spk = speaker_list[0]
|
spk = speaker_list[0]
|
||||||
if body:
|
if body:
|
||||||
segments.append((body, spk))
|
segments.append((body, spk))
|
||||||
|
|||||||
Reference in New Issue
Block a user