EXTRACT_PATS = [
r"boxed{([^{}]+)}",
r"finals+solutions*[:=]s*([^n]+)",
r"solutions*[:=]s*([^n]+)",
]
def extract_final(textual content):
if not textual content: return ""
for p in EXTRACT_PATS:
m = re.search(p, textual content, flags=re.IGNORECASE)
if m: return m.group(1).strip().strip(".,;")
strains = [l.strip() for l in str(text).strip().splitlines() if l.strip()]
return strains[-1] if strains else ""
def latex_to_sympy(s):
s = (s or "").strip().strip("$").strip()
s = re.sub(r"^[[(]", "", s); s = re.sub(r"[])]$", "", s)
s = (s.exchange("pi", "pi").exchange("cdot", "*").exchange("instances", "*")
.exchange(",", "").exchange(";", "").exchange("!", ""))
s = re.sub(r"fracs*{([^{}]+)}s*{([^{}]+)}", r"((1)/(2))", s)
s = re.sub(r"sqrts*{([^{}]+)}", r"sqrt(1)", s)
s = s.exchange("^", "**")
s = re.sub(r"[a-zA-Z]+", "", s)
s = s.exchange("{", "(").exchange("}", ")")
return s
def grade(pred, gold, tol=1e-4):
"""Verifiable reward in [0,1]: precise > numeric > sympy-symbolic > partial."""
if pred is None or gold is None: return 0.0
p = extract_final(str(pred)).strip()
g = str(gold).strip()
norm = lambda x: re.sub(r"s+", "", x.decrease()).strip("$.,;[]()")
if norm(p) == norm(g): return 1.0
def to_float(x):
strive: return float(latex_to_sympy(x))
besides Exception:
strive: return float(sp.sympify(latex_to_sympy(x)).evalf())
besides Exception: return None
fp, fg = to_float(p), to_float(g)
if fp will not be None and fg will not be None:
if abs(fp - fg) / max(1.0, abs(fg)) < tol: return 1.0
strive:
ep = sp.sympify(latex_to_sympy(p)); eg = sp.sympify(latex_to_sympy(g))
if sp.simplify(ep - eg) == 0: return 1.0
besides Exception:
move
if norm(g) and norm(g) in norm(p): return 0.5
return 0.0
print("n=== Grader sanity checks ===")
for pred, gold, need in [
("The answer is boxed{120}", "[120]", 1.0),
("After computing: 7396 pi", "7396pi", 1.0),
("Ultimate reply: -71/4", "-frac{71}{4}", 1.0),
("Due to this fact the result's 0.0074", "0.0074", 1.0),
("Ultimate reply: nucleus accumbens", "Nucleus accumbens",1.0),
("I do not know", "12", 0.0),
]:
print(f" pred={pred[:38]!r:42s} gold={gold!r:22s} -> r={grade(pred, gold)} (need {need})")
SYSTEM = ("You're a STEM knowledgeable fixing multimodal reasoning issues. "
"You will notice a query and a number of figures. "
"Purpose step-by-step, then finish with precisely one line:n"
"Ultimate reply: ")
def build_prompt(ex):
img_tags = "n".be part of(f"[Image {i+1}]" for i in vary(len(ex["images"])))
return f"{SYSTEM}nn{img_tags}nnQuestion:n{ex['question']}nnLet's assume step-by-step."
print("n=== Instance immediate (truncated) ===")
print(build_prompt(ds[0])[:600], "...n")
Design a Full Multimodal RLVR Pipeline with Open-MM-RL, Imaginative and prescient-Language Prompting, Reward Scoring, and GRPO Export
RELATED ARTICLES
