diff --git a/documentation/example_conf/llamacpp.conf b/documentation/example_conf/llamacpp.conf
new file mode 100644
index 0000000..53c5580
--- /dev/null
+++ b/documentation/example_conf/llamacpp.conf
@@ -0,0 +1,17 @@
+{
+    "single" :
+    {
+        "framework" : "llamacpp",
+        "model" : ["rocket-3b.Q4_0.gguf"],
+        "input_info" : [
+          {
+            "format" : "flexible"
+          }
+        ],
+        "output_info" : [
+          {
+            "format" : "flexible"
+          }
+        ]
+    }
+}
diff --git a/ml_inference_offloading/src/main/assets/models/README.md b/ml_inference_offloading/src/main/assets/models/README.md
index 49ca3db..c751fd2 100644
--- a/ml_inference_offloading/src/main/assets/models/README.md
+++ b/ml_inference_offloading/src/main/assets/models/README.md
@@ -4,6 +4,12 @@
 
 ### yolov8s_float32
 
+### llamacpp
+
+To run llamacpp model, copy gguf file into this directory.
+You can download small size LLM gguf model [here](https://huggingface.co/TheBloke/rocket-3B-GGUF).
+To enable optimized GEMM/GEMV kernels use Q4_0 to Q4_0_x_x from [prebuilt libraries](https://github.com/nnstreamer/nnstreamer-android-resource).
+
 ### llama2c
 
 To run llama2c model, copy model.bin and tokenizer.bin file into this directory.