The Roadmap for Mastering LLMOps in 2026

# llm_with_tracing.py

# Purpose: A production-ready LLM call wrapper with full observability.

# Every call is traced in Langfuse: input, output, tokens, cost, latency.

# Prerequisites:

# pip install langfuse anthropic python-dotenv

# Setup:

# 1. Create a free account at https://cloud.langfuse.com

# 2. Get your keys from Settings > API Keys

# 3. Create a .env file with the variables below

# Run:

# python llm_with_tracing.py

import os

import time

from dotenv import load_dotenv

import anthropic

from langfuse import Langfuse

# Load environment variables from .env file

load_dotenv()

# Required environment variables in your .env:

# LANGFUSE_PUBLIC_KEY=pk-lf-…

# LANGFUSE_SECRET_KEY=sk-lf-…

# LANGFUSE_HOST=https://cloud.langfuse.com (or your self-hosted URL)

# ANTHROPIC_API_KEY=sk-ant-…

# Initialize clients

langfuse_client = Langfuse() # Reads keys automatically from environment

anthropic_client = anthropic.Anthropic() # Reads ANTHROPIC_API_KEY from environment

# ── Configuration ─────────────────────────────────────────────────────────────

# Store your prompt here, not inline in the API call.

# This makes it versionable and testable independently.

SYSTEM_PROMPT = “””You are a helpful customer support assistant.

Answer questions clearly and concisely.

If you do not know something, say so directly — do not guess.”””

MODEL = “claude-sonnet-4-20250514”

# Anthropic’s pricing as of mid-2026 (update when pricing changes)

# Used to calculate cost per call for cost tracking

COST_PER_INPUT_TOKEN = 3.00 / 1_000_000 # $3.00 per million input tokens

COST_PER_OUTPUT_TOKEN = 15.00 / 1_000_000 # $15.00 per million output tokens

def call_llm_with_tracing(

user_message: str,

session_id: str = “default-session”,

user_id: str = “anonymous”

) -> str:

“””

Make a traced LLM call. Every call creates a Langfuse trace with:

– Full input and output

– Token usage (input, output, total)

– Calculated cost in USD

– Latency in milliseconds

– Model used and session context

Parameters:

user_message : The message from the user

session_id : Groups related calls into one conversation in Langfuse

user_id : Associates the call with a specific user for analytics

Returns:

The LLM response as a string

“””

# Create a top-level trace for this user interaction

# The trace appears in the Langfuse dashboard as one unit of work

trace = langfuse_client.trace(

name=”customer-support-call”,

session_id=session_id,

user_id=user_id,

input={“user_message”: user_message, “system_prompt”: SYSTEM_PROMPT}

)

# Create a generation span inside the trace

# This captures model-specific details: model name, tokens, cost

generation = trace.generation(

name=”claude-completion”,

model=MODEL,

input={

“system”: SYSTEM_PROMPT,

“messages”: ({“role”: “user”, “content”: user_message})

}

)

start_time = time.time()

try:

# Make the API call

response = anthropic_client.messages.create(

model=MODEL,

max_tokens=1024,

system=SYSTEM_PROMPT,

messages=({“role”: “user”, “content”: user_message})

)

latency_ms = int((time.time() – start_time) * 1000)

# Extract the response text

response_text = response.content(0).text

# Extract token usage from the response

input_tokens = response.usage.input_tokens

output_tokens = response.usage.output_tokens

total_tokens = input_tokens + output_tokens

# Calculate cost for this call

cost_usd = (

input_tokens * COST_PER_INPUT_TOKEN +

output_tokens * COST_PER_OUTPUT_TOKEN

)

# Update the generation span with results

# This data populates the Langfuse cost and token dashboards

generation.end(

output=response_text,

usage={

“input”: input_tokens,

“output”: output_tokens,

“total”: total_tokens,

“unit”: “TOKENS”

metadata={

“latency_ms”: latency_ms,

“cost_usd”: round(cost_usd, 6),

“model”: MODEL

}

)

# Update the trace with the final output

trace.update(

output={“response”: response_text},

metadata={“total_cost_usd”: round(cost_usd, 6)}

)

# Print a summary to stdout for local visibility

print(f”\n{‘─’ * 60}”)

print(f”User: {user_message}”)

print(f”Claude: {response_text}”)

print(f”Tokens: {input_tokens} in / {output_tokens} out / {total_tokens} total”)

print(f”Cost: ${cost_usd:.6f}”)

print(f”Latency: {latency_ms}ms”)

print(f”Trace: {langfuse_client.base_url}/trace/{trace.id}”)

print(f”{‘─’ * 60}\n”)

return response_text

except Exception as e:

# Record the error in the trace so it shows up in Langfuse

generation.end(

output=None,

metadata={“error”: str(e), “latency_ms”: int((time.time() – start_time) * 1000)}

)

trace.update(output={“error”: str(e)})

# Always flush before raising — ensures the error trace is sent

langfuse_client.flush()

raise

finally:

# Flush sends all buffered events to Langfuse

# In a long-running service, Langfuse flushes automatically.

# In a script, you must flush manually before the process exits.

langfuse_client.flush()

# ── Run a demonstration ────────────────────────────────────────────────────────

if __name__ == “__main__”:

# Simulate two turns of a customer support conversation

test_messages = (

“What is your return policy for electronics?”,

“Can I return an item I bought 45 days ago?”

)

session = “demo-session-001”

for i, message in enumerate(test_messages):

print(f”\nCall {i + 1}/{len(test_messages)}”)

try:

call_llm_with_tracing(

user_message=message,

session_id=session,

user_id=”test-user-42″

)

except Exception as e:

print(f”Error on call {i + 1}: {e}”)

Source link

The Roadmap for Mastering LLMOps in 2026

Why robotics groups want digital gyms earlier than deployment

Prescription Drug Costs Fell Whereas Hospital Prices Rose—What the Might CPI Means for Seniors

Beatbot AquaSense X Evaluate: A Pool Robotic That Cleans Itself

Bringing Ode Poetry to life with MAI’s audio fashions

Chainlink CCIP Comes To Arbitrum Orbit As Layer-3 Builders Chase Safer Messaging

I modified 5 settings and my Moto Buds 2 Plus immediately sounded higher

Bringing Ode Poetry to life with MAI’s audio fashions

Responsibly constructing the AI future

Aurora 1.5: Extending open basis fashions for climate and Earth-system functions

Selecting the Proper AI Agent Reminiscence Technique: A Determination-Tree Strategy

Transferring gross sales and repair organizations ahead with agentic CX and Microsoft 365 Copilot

Making humanitarian safety seen in our on-line world: The promise of the Digital Emblem

Leave a ReplyCancel reply

São Paulo Nightlife Information for Wednesday, June 17, 2026

Has Technique’s New Framework Defused STRC ‘Demise Spiral’ Fears?

Ought to You Purchase the Honeywell Aerospace Spinoff?

HarmonyOS 6 public beta launched, listed below are all of the units getting it

What’s Taking place Between ETH And The Monetary Programs?

XELA Robotics to indicate tactile sensing on the 2026 Robotics Summit & Expo

CME Group Goes Dwell With 24/7 Crypto Futures And Choices, Launches Bitcoin Volatility Contracts

Robert “Kool” Bell Has Le Kool Champagne And Kool King Coconut Water

Why robotics groups want digital gyms earlier than deployment

Prescription Drug Costs Fell Whereas Hospital Prices Rose—What the Might CPI Means for Seniors

Beatbot AquaSense X Evaluate: A Pool Robotic That Cleans Itself

Bringing Ode Poetry to life with MAI’s audio fashions

Chainlink CCIP Comes To Arbitrum Orbit As Layer-3 Builders Chase Safer Messaging

I modified 5 settings and my Moto Buds 2 Plus immediately sounded higher

Leave a ReplyCancel reply

Log In

Sign In

Forgot password?

Your password reset link appears to be invalid or expired.

Log in

Privacy Policy

Add to Collection

No Collections