Tinker Client
The Tinker Client is suitable for scenarios with existing Tinker training code. After initializing with init_tinker_client, it patches the Tinker SDK to point to the Twinkle Server, and the rest of the code can directly reuse existing Tinker training code.
Initialization
# Initialize Tinker client before importing ServiceClient
from twinkle import init_tinker_client
init_tinker_client()
# Use ServiceClient directly from tinker
from tinker import ServiceClient
service_client = ServiceClient(
base_url='http://localhost:8000', # Server address
api_key=os.environ.get('MODELSCOPE_TOKEN') # Recommended: set to ModelScope Token
)
# Verify connection: List available models on Server
for item in service_client.get_server_capabilities().supported_models:
print("- " + item.model_name)
What does init_tinker_client do?
When calling init_tinker_client, the following operations are automatically executed:
Patch Tinker SDK: Bypass Tinker’s
tinker://prefix validation, allowing it to connect to standard HTTP addressesSet Request Headers: Inject necessary authentication headers such as
X-Ray-Serve-Request-IdandAuthorization
After initialization, simply import from tinker import ServiceClient to connect to Twinkle Server, and all existing Tinker training code can be used directly without any modifications.
Complete Training Example
Note:
DataLoaderandDatasetin Tinker compatible mode only support localtwinkleimports;twinkle_clientis not supported.
import os
import numpy as np
from tqdm import tqdm
from tinker import types
from twinkle import init_tinker_client
from twinkle.dataloader import DataLoader
from twinkle.dataset import Dataset, DatasetMeta
from twinkle.preprocessor import SelfCognitionProcessor
from twinkle.server.common import input_feature_to_datum
# Step 1: Initialize Tinker client before importing ServiceClient
init_tinker_client()
from tinker import ServiceClient
base_model = 'Qwen/Qwen3.5-4B'
base_url = 'http://localhost:8000'
api_key = 'EMPTY_API_KEY'
# Step 2: Prepare dataset
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))
dataset.set_template('Qwen3_5Template', model_id=f'ms://{base_model}', max_length=256)
dataset.map(SelfCognitionProcessor('twinkle model', 'ModelScope Team'), load_from_cache_file=False)
dataset.encode(batched=True, load_from_cache_file=False)
dataloader = DataLoader(dataset=dataset, batch_size=8)
# Step 3: Initialize training client
service_client = ServiceClient(base_url=base_url, api_key=api_key)
# Create LoRA training client (rank=16 specifies the LoRA adapter rank)
training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
# Step 4: Training loop
for epoch in range(3):
print(f'Epoch {epoch}')
for step, batch in tqdm(enumerate(dataloader)):
# Convert Twinkle's InputFeature to Tinker's Datum format
input_datum = [input_feature_to_datum(input_feature) for input_feature in batch]
# Send data to Server: forward + backward propagation
fwdbwd_future = training_client.forward_backward(input_datum, 'cross_entropy')
# Optimizer step: update model weights with Adam
optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4))
# Wait for both operations to complete
fwdbwd_result = fwdbwd_future.result()
optim_result = optim_future.result()
# Compute weighted average log-loss per token for monitoring
logprobs = np.concatenate([output['logprobs'].tolist() for output in fwdbwd_result.loss_fn_outputs])
weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in input_datum])
print(f'Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}')
print(f'Training Metrics: {optim_result}')
# Save a checkpoint after each epoch
save_future = training_client.save_state(f'twinkle-lora-{epoch}')
save_result = save_future.result()
print(f'Saved checkpoint to {save_result.path}')
Inference Sampling
Tinker compatible mode supports inference sampling functionality (Server needs to have Sampler service configured).
Sampling from Training
After training is complete, you can directly create a sampling client from the training client:
# Save current weights and create sampling client
sampling_client = training_client.save_weights_and_get_sampling_client(name='my-model')
# Prepare inference input
prompt = types.ModelInput.from_ints(tokenizer.encode("English: coffee break\nPig Latin:"))
params = types.SamplingParams(
max_tokens=20, # Maximum number of tokens to generate
temperature=0.0, # Greedy sampling (deterministic output)
stop=["\n"] # Stop when encountering newline
)
# Generate multiple completions
result = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8).result()
for i, seq in enumerate(result.sequences):
print(f"{i}: {tokenizer.decode(seq.tokens)}")
Sampling from Checkpoint
You can also load saved checkpoints for inference:
import os
from tinker import types
from twinkle import init_tinker_client
from twinkle.data_format import Message, Trajectory
from twinkle.template import Template
# Initialize Tinker client before importing ServiceClient
init_tinker_client()
from tinker import ServiceClient
base_model = 'Qwen/Qwen3.5-4B'
base_url = 'http://localhost:8000'
api_key = 'EMPTY_API_KEY'
service_client = ServiceClient(base_url=base_url, api_key=api_key)
# Create sampling client from saved checkpoint
sampling_client = service_client.create_sampling_client(
model_path='twinkle://run_id/weights/checkpoint_name', # twinkle:// path of the checkpoint
base_model=base_model
)
# Use Twinkle's Template to build multi-turn dialogue input
template = Template(model_id=f'ms://{base_model}')
trajectory = Trajectory(
messages=[
Message(role='system', content='You are a helpful assistant'),
Message(role='user', content='What is your name?'),
]
)
input_feature = template.batch_encode([trajectory], add_generation_prompt=True)[0]
input_ids = input_feature['input_ids'].tolist()
prompt = types.ModelInput.from_ints(input_ids)
params = types.SamplingParams(
max_tokens=50, # Maximum number of tokens to generate
temperature=0.2, # Low temperature, more focused answers
)
# Generate multiple completions
print('Sampling...')
future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8)
result = future.result()
# Decode and print each response
print('Responses:')
for i, seq in enumerate(result.sequences):
print(f'{i}: {repr(template.decode(seq.tokens))}')
Publishing Checkpoint to ModelScope Hub
After training is complete, you can publish checkpoints to ModelScope Hub through the REST client:
rest_client = service_client.create_rest_client()
# Publish checkpoint from tinker path
# Need to set a valid ModelScope token as api_key when initializing the client
rest_client.publish_checkpoint_from_tinker_path(save_result.path).result()
print("Published checkpoint to ModelScope Hub")