| | |
| | """ |
| | Test Client for Gemma 3n GGUF Backend |
| | Demonstrates the complete integration working |
| | """ |
| |
|
| | import requests |
| | import json |
| | import time |
| |
|
| | def test_gemma_backend(): |
| | """Test the Gemma 3n GGUF backend integration""" |
| | base_url = "http://localhost:8000" |
| | |
| | print("π§ͺ Testing Gemma 3n GGUF Backend Integration") |
| | print("=" * 50) |
| | |
| | |
| | print("\n1. π Testing Health Endpoint") |
| | try: |
| | response = requests.get(f"{base_url}/health") |
| | health_data = response.json() |
| | print(f"β
Health Status: {health_data['status']}") |
| | print(f"π€ Model: {health_data['model']}") |
| | print(f"π οΈ Backend: {health_data['backend']}") |
| | print(f"π Version: {health_data['version']}") |
| | except Exception as e: |
| | print(f"β Health check failed: {e}") |
| | return False |
| | |
| | |
| | print("\n2. π Testing Root Info Endpoint") |
| | try: |
| | response = requests.get(f"{base_url}/") |
| | root_data = response.json() |
| | print(f"β
Service: {root_data['message']}") |
| | print(f"π Model Loaded: {root_data.get('model_loaded', 'unknown')}") |
| | print(f"π― Available Endpoints: {', '.join(root_data['endpoints'].keys())}") |
| | except Exception as e: |
| | print(f"β Root info failed: {e}") |
| | return False |
| | |
| | |
| | print("\n3. π¬ Testing Chat Completion") |
| | chat_request = { |
| | "model": "gemma-3n-e4b-it", |
| | "messages": [ |
| | {"role": "user", "content": "Hello! What is your name and what can you help me with?"} |
| | ], |
| | "max_tokens": 150, |
| | "temperature": 0.7 |
| | } |
| | |
| | try: |
| | start_time = time.time() |
| | response = requests.post( |
| | f"{base_url}/v1/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | json=chat_request |
| | ) |
| | end_time = time.time() |
| | |
| | if response.status_code == 200: |
| | chat_data = response.json() |
| | print(f"β
Chat completion successful!") |
| | print(f"β‘ Response time: {end_time - start_time:.2f}s") |
| | print(f"π― Model: {chat_data['model']}") |
| | print(f"π’ Completion ID: {chat_data['id']}") |
| | |
| | |
| | assistant_message = chat_data['choices'][0]['message']['content'] |
| | print(f"\nπ€ Assistant Response:") |
| | print(f" {assistant_message}") |
| | print(f"π Finish Reason: {chat_data['choices'][0]['finish_reason']}") |
| | else: |
| | print(f"β Chat completion failed with status: {response.status_code}") |
| | print(f"π Response: {response.text}") |
| | return False |
| | |
| | except Exception as e: |
| | print(f"β Chat completion failed: {e}") |
| | return False |
| | |
| | |
| | print("\n4. π Testing Multi-turn Conversation") |
| | multi_turn_request = { |
| | "model": "gemma-3n-e4b-it", |
| | "messages": [ |
| | {"role": "user", "content": "What is AI?"}, |
| | {"role": "assistant", "content": "AI stands for Artificial Intelligence. It refers to the simulation of human intelligence in machines."}, |
| | {"role": "user", "content": "What are some practical applications?"} |
| | ], |
| | "max_tokens": 100, |
| | "temperature": 0.5 |
| | } |
| | |
| | try: |
| | response = requests.post( |
| | f"{base_url}/v1/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | json=multi_turn_request |
| | ) |
| | |
| | if response.status_code == 200: |
| | chat_data = response.json() |
| | print("β
Multi-turn conversation successful!") |
| | assistant_response = chat_data['choices'][0]['message']['content'] |
| | print(f"π€ Follow-up Response: {assistant_response[:100]}...") |
| | else: |
| | print(f"β Multi-turn failed with status: {response.status_code}") |
| | |
| | except Exception as e: |
| | print(f"β Multi-turn conversation failed: {e}") |
| | |
| | print("\n" + "=" * 50) |
| | print("π Gemma 3n GGUF Backend Integration Test Complete!") |
| | print("β
Your app is successfully using the Gemma-3n-E4B-it-GGUF model!") |
| | |
| | return True |
| |
|
| | if __name__ == "__main__": |
| | print("π Starting Gemma 3n Integration Test...") |
| | print("π Make sure the backend is running: python3 gemma_gguf_backend.py") |
| | print("β³ Waiting 2 seconds for you to start the backend if needed...") |
| | time.sleep(2) |
| | |
| | success = test_gemma_backend() |
| | |
| | if success: |
| | print("\nπ― Integration Summary:") |
| | print(" β
Backend is running correctly") |
| | print(" β
OpenAI-compatible API working") |
| | print(" β
Gemma 3n model integration successful") |
| | print(" β
Ready for production use!") |
| | else: |
| | print("\nβ Some tests failed. Check the backend logs.") |
| | print("π‘ Make sure to run: python3 gemma_gguf_backend.py") |
| |
|