diff --git a/README.md b/README.md
index 642183cc5a2135dde16e6c8d6cc919f1ab53853b..8e7d53711fed34fdb99eeb37abb490e80a597bd3 100644
--- a/README.md
+++ b/README.md
@@ -119,6 +119,7 @@
 | CSPResNet50            | FP16  | [✅](models/cv/classification/cspresnet50/igie)         | [✅](models/cv/classification/cspresnet50/ixrt)            | 4.3.0     |
 |                        | INT8  |                                                        | [✅](models/cv/classification/cspresnet50/ixrt)            | 4.3.0     |
 | CSPResNeXt50           | FP16  | [✅](models/cv/classification/cspresnext50/igie)        | [✅](models/cv/classification/cspresnext50/ixrt)           | 4.3.0     |
+| DeiT-B                 | FP16  |                                                          | [✅](models/cv/classification/deit_b/ixrt)              | 4.4.0     |
 | DeiT-tiny              | FP16  | [✅](models/cv/classification/deit_tiny/igie)           | [✅](models/cv/classification/deit_tiny/ixrt)              | 4.3.0     |
 | DenseNet121            | FP16  | [✅](models/cv/classification/densenet121/igie)         | [✅](models/cv/classification/densenet121/ixrt)            | 4.3.0     |
 |                        | INT8  | [✅](models/cv/classification/densenet121/igie)         |                                                            | 4.4.0     |
@@ -152,6 +153,7 @@
 | MNASNet0_75            | FP16  | [✅](models/cv/classification/mnasnet0_75/igie)         |                                                           | 4.3.0     |
 | MNASNet1_0             | FP16  | [✅](models/cv/classification/mnasnet1_0/igie)          |                                                           | 4.3.0     |
 | MNASNet1_3             | FP16  | [✅](models/cv/classification/mnasnet1_3/igie)          |                                                           | 4.3.0     |
+| MobileNetV1            | FP16  |                                                        | [✅](models/cv/classification/mobilenet_v1/ixrt)           | 4.4.0     |
 | MobileNetV2            | FP16  | [✅](models/cv/classification/mobilenet_v2/igie)        | [✅](models/cv/classification/mobilenet_v2/ixrt)           | 4.3.0     |
 |                        | INT8  | [✅](models/cv/classification/mobilenet_v2/igie)        | [✅](models/cv/classification/mobilenet_v2/ixrt)           | 4.3.0     |
 | MobileNetV3_Large      | FP16  | [✅](models/cv/classification/mobilenet_v3_large/igie)  |                                                           | 4.3.0     |
@@ -211,7 +213,7 @@
 |                        | INT8  | [✅](models/cv/classification/vgg16/igie)               |                                                           | 4.3.0     |
 | VGG19                  | FP16  | [✅](models/cv/classification/vgg19/igie)               |                                                           | 4.3.0     |
 | VGG19_BN               | FP16  | [✅](models/cv/classification/vgg19_bn/igie)            |                                                           | 4.3.0     |
-| ViT                    | FP16  | [✅](models/cv/classification/vit/igie)                 |                                                           | 4.3.0     |
+| ViT                    | FP16  | [✅](models/cv/classification/vit/igie)                 | [✅](models/cv/classification/vit/ixit)                 | 4.3.0     |
 | ViT-B-32               | FP16  | [✅](models/cv/classification/vit_b_32/igie)            |                                                           | 4.4.0     |
 | ViT-L-14               | FP16  | [✅](models/cv/classification/vit_l_14/igie)            |                                                           | 4.4.0     |
 | Wide ResNet50          | FP16  | [✅](models/cv/classification/wide_resnet50/igie)       | [✅](models/cv/classification/wide_resnet50/ixrt)          | 4.3.0     |
@@ -293,9 +295,10 @@
 
 | Model         | Prec. | IGIE                                  |     ixRT                              | IXUCA SDK |
 |---------------|-------|---------------------------------------|---------------------------------------|-----------|
+| CRNN          | FP16  |                                         | [✅](models/cv/ocr/crnn/ixrt)      |  4.4.0     |
+| DBNet         | FP16  |                                        |  [✅](models/cv/ocr/dbnet/ixrt)     |  4.4.0     |
 | Kie_layoutXLM | FP16  | [✅](models/cv/ocr/kie_layoutxlm/igie) |                                     |  4.3.0     |
 | SVTR          | FP16  | [✅](models/cv/ocr/svtr/igie)          |                                     |  4.3.0     |
-| CRNN          | FP16  |                                         | [✅](models/cv/ocr/crnn/ixrt)      |  4.4.0     |
 
 #### 姿态估计
 
@@ -316,6 +319,7 @@
 
 | Model | Prec. | IGIE                                           | ixRT                                           | IXUCA SDK |
 |-------|-------|------------------------------------------------|------------------------------------------------|-----------|
+| DDRNet | FP16  |                                               | [✅](models/cv/semantic_segmentation/ddrnet/ixrt)  | 4.4.0     |
 | UNet  | FP16  | [✅](models/cv/semantic_segmentation/unet/igie) | [✅](models/cv/semantic_segmentation/unet/ixrt)  | 4.3.0     |
 
 #### 多目标跟踪
@@ -377,6 +381,7 @@
 | Model           | Prec. | IGIE                                                | ixRT                                                      | IXUCA SDK |
 |-----------------|-------|-----------------------------------------------------|-----------------------------------------------------------|-----------|
 | Conformer       | FP16  | [✅](models/audio/speech_recognition/conformer/igie) | [✅](models/audio/speech_recognition/conformer/ixrt)       | 4.3.0     |
+| DeepSpeech2     | FP16  |                                                      | [✅](models/speech/speech_recognition/deepspeech2/ixrt)     | 4.4.0     |
 | Transformer ASR | FP16  |                                                     | [✅](models/audio/speech_recognition/transformer_asr/ixrt) | 4.2.0     |
 
 ### 其他
diff --git a/README_en.md b/README_en.md
index af9c0e6e2ff3c5ff46286591c8630d124f0ba2c0..0b55d2b038a0e7c4da557767ab61a3ae3e5519a5 100644
--- a/README_en.md
+++ b/README_en.md
@@ -111,6 +111,7 @@ inference to be expanded in the future.
 | CSPResNet50            | FP16  | [✅](models/cv/classification/cspresnet50/igie)         | [✅](models/cv/classification/cspresnet50/ixrt)            | 4.3.0     |
 |                        | INT8  |                                                        | [✅](models/cv/classification/cspresnet50/ixrt)            | 4.3.0     |
 | CSPResNeXt50           | FP16  | [✅](models/cv/classification/cspresnext50/igie)        | [✅](models/cv/classification/cspresnext50/ixrt)           | 4.3.0     |
+| DeiT-B                 | FP16  |                                                          | [✅](models/cv/classification/deit_b/ixrt)              | 4.4.0     |
 | DeiT-tiny              | FP16  | [✅](models/cv/classification/deit_tiny/igie)           | [✅](models/cv/classification/deit_tiny/ixrt)              | 4.3.0     |
 | DenseNet121            | FP16  | [✅](models/cv/classification/densenet121/igie)         | [✅](models/cv/classification/densenet121/ixrt)            | 4.3.0     |
 |                        | INT8  | [✅](models/cv/classification/densenet121/igie)         |                                                            | 4.4.0     |
@@ -144,6 +145,7 @@ inference to be expanded in the future.
 | MNASNet0_75            | FP16  | [✅](models/cv/classification/mnasnet0_75/igie)         |                                                           | 4.3.0     |
 | MNASNet1_0             | FP16  | [✅](models/cv/classification/mnasnet1_0/igie)          |                                                           | 4.3.0     |
 | MNASNet1_3             | FP16  | [✅](models/cv/classification/mnasnet1_3/igie)          |                                                           | 4.3.0     |
+| MobileNetV1            | FP16  |                                                        | [✅](models/cv/classification/mobilenet_v1/ixrt)           | 4.4.0     |
 | MobileNetV2            | FP16  | [✅](models/cv/classification/mobilenet_v2/igie)        | [✅](models/cv/classification/mobilenet_v2/ixrt)           | 4.3.0     |
 |                        | INT8  | [✅](models/cv/classification/mobilenet_v2/igie)        | [✅](models/cv/classification/mobilenet_v2/ixrt)           | 4.3.0     |
 | MobileNetV3_Large      | FP16  | [✅](models/cv/classification/mobilenet_v3_large/igie)  |                                                           | 4.3.0     |
@@ -203,7 +205,7 @@ inference to be expanded in the future.
 |                        | INT8  | [✅](models/cv/classification/vgg16/igie)               |                                                           | 4.3.0     |
 | VGG19                  | FP16  | [✅](models/cv/classification/vgg19/igie)               |                                                           | 4.3.0     |
 | VGG19_BN               | FP16  | [✅](models/cv/classification/vgg19_bn/igie)            |                                                           | 4.3.0     |
-| ViT                    | FP16  | [✅](models/cv/classification/vit/igie)                 |                                                           | 4.3.0     |
+| ViT                    | FP16  | [✅](models/cv/classification/vit/igie)                 | [✅](models/cv/classification/vit/ixit)                  | 4.3.0    |
 | ViT-B-32               | FP16  | [✅](models/cv/classification/vit_b_32/igie)            |                                                           | 4.4.0     |
 | ViT-L-14               | FP16  | [✅](models/cv/classification/vit_l_14/igie)            |                                                           | 4.4.0     |
 | Wide ResNet50          | FP16  | [✅](models/cv/classification/wide_resnet50/igie)       | [✅](models/cv/classification/wide_resnet50/ixrt)          | 4.3.0     |
@@ -284,9 +286,10 @@ inference to be expanded in the future.
 
 | Model         | Prec. | IGIE                                  |     ixRT                              | IXUCA SDK |
 |---------------|-------|---------------------------------------|---------------------------------------|-----------|
+| CRNN          | FP16  |                                         | [✅](models/cv/ocr/crnn/ixrt)      |  4.4.0     |
+| DBNet         | FP16  |                                        |  [✅](models/cv/ocr/dbnet/ixrt)     |  4.4.0     |
 | Kie_layoutXLM | FP16  | [✅](models/cv/ocr/kie_layoutxlm/igie) |                                     |  4.3.0     |
 | SVTR          | FP16  | [✅](models/cv/ocr/svtr/igie)          |                                     |  4.3.0     |
-| CRNN          | FP16  |                                         | [✅](models/cv/ocr/crnn/ixrt)      |  4.4.0     |
 
 #### Pose Estimation
 
@@ -307,6 +310,7 @@ inference to be expanded in the future.
 
 | Model | Prec. | IGIE                                           | ixRT                                           | IXUCA SDK |
 |-------|-------|------------------------------------------------|------------------------------------------------|-----------|
+| DDRNet | FP16  |                                               | [✅](models/cv/semantic_segmentation/ddrnet/ixrt)  | 4.4.0     |
 | UNet  | FP16  | [✅](models/cv/semantic_segmentation/unet/igie) | [✅](models/cv/semantic_segmentation/unet/ixrt)  | 4.3.0     |
 
 #### Multi-Object Tracking
@@ -367,6 +371,7 @@ inference to be expanded in the future.
 | Model           | Prec. | IGIE                                                | ixRT                                                      | IXUCA SDK |
 |-----------------|-------|-----------------------------------------------------|-----------------------------------------------------------|-----------|
 | Conformer       | FP16  | [✅](models/audio/speech_recognition/conformer/igie) | [✅](models/audio/speech_recognition/conformer/ixrt)       | 4.3.0     |
+| DeepSpeech2     | FP16  |                                                      | [✅](models/speech/speech_recognition/deepspeech2/ixrt)     | 4.4.0     |
 | Transformer ASR | FP16  |                                                     | [✅](models/audio/speech_recognition/transformer_asr/ixrt) | 4.2.0     |
 
 ### Others
diff --git a/models/cv/classification/deit_b/ixrt/README.md b/models/cv/classification/deit_b/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bc315f978fe6349cc64ec5462ab82ef63381bc50
--- /dev/null
+++ b/models/cv/classification/deit_b/ixrt/README.md
@@ -0,0 +1,64 @@
+# DeiT-Base (ixRT)
+
+## Model Description
+
+DeiT-Base (Data-efficient Image Transformer Base) is a vision transformer model that uses knowledge distillation to achieve competitive performance with fewer training resources.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.4.0 | 26.06 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <http://files.deepspark.org.cn:880/deepspark/data/checkpoints/deit_b.onnx>
+
+Download the [imagenet](https://www.image-net.org/download.php) to download the validation dataset.
+
+### Install Dependencies
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-glx
+
+pip3 install -r ../../ixrt_common/requirements.txt
+```
+
+### Model Conversion
+
+```bash
+mkdir checkpoints
+cd checkpoints
+wget http://files.deepspark.org.cn:880/deepspark/data/checkpoints/deit_b.onnx
+```
+
+## Model Inference
+
+```bash
+export PROJ_DIR=./
+export DATASETS_DIR=/path/to/imagenet_val/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=../../ixrt_common/
+export CONFIG_DIR=../../ixrt_common/config/DEIT_B_CONFIG
+```
+
+### FP16
+
+```bash
+# Test ACC
+bash scripts/infer_deit_b_fp16_accuracy.sh
+# Test FPS
+bash scripts/infer_deit_b_fp16_performance.sh
+```
+
+## Model Results
+
+| Model       | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
+| ----------- | --------- | --------- | ------- | -------- | -------- |
+| DeiT-Base   | 32        | FP16      | 596.381    | 81.7   | 95.6   |
diff --git a/models/cv/classification/deit_b/ixrt/ci/prepare.sh b/models/cv/classification/deit_b/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..249358b92e91857270d0b16bb2764aa2fdfea99b
--- /dev/null
+++ b/models/cv/classification/deit_b/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install tqdm onnxsim opencv-python==4.6.0.66
+
+mkdir -p checkpoints
+cp /root/data/checkpoints/deit_b.onnx checkpoints/
\ No newline at end of file
diff --git a/models/cv/classification/deit_b/ixrt/scripts/infer_deit_b_fp16_accuracy.sh b/models/cv/classification/deit_b/ixrt/scripts/infer_deit_b_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e8c90b6873cc34410d9db6ddf29ba15371a1bd66
--- /dev/null
+++ b/models/cv/classification/deit_b/ixrt/scripts/infer_deit_b_fp16_accuracy.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+BSZ=32
+TGT=0.796
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model Skipped, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+            --origin_model $ORIGINE_MODEL    \
+            --output_model ${SIM_MODEL} || exit 1
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Refine Model
+let step++
+echo;
+echo [STEP ${step}] : Refine Model
+REFINE_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_refine.onnx
+if [ -f ${REFINE_MODEL} ];then
+    echo "  "Refine Model Skipped, ${REFINE_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/refine_model.py \
+            --onnx_path  ${SIM_MODEL} \
+            --dst_onnx_path ${REFINE_MODEL} \
+            --bsz  ${BSZ}               \
+            --imgsz  ${IMGSIZE} || exit 1
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skipped, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py \
+            --batch_size ${BSZ} \
+            --origin_model ${REFINE_MODEL} \
+            --output_model ${FINAL_MODEL} || exit 1
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                  \
+        --engine ${ENGINE_FILE} || exit 1
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/deit_b/ixrt/scripts/infer_deit_b_fp16_performance.sh b/models/cv/classification/deit_b/ixrt/scripts/infer_deit_b_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9d41dafae6f08256c305d1ecaf32ff824973239d
--- /dev/null
+++ b/models/cv/classification/deit_b/ixrt/scripts/infer_deit_b_fp16_performance.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+BSZ=32
+TGT=0.796
+WARM_UP=10
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model Skipped, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+            --origin_model $ORIGINE_MODEL    \
+            --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Refine Model
+let step++
+echo;
+echo [STEP ${step}] : Refine Model
+REFINE_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_refine.onnx
+if [ -f ${REFINE_MODEL} ];then
+    echo "  "Refine Model Skipped, ${REFINE_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/refine_model.py \
+            --onnx_path  ${SIM_MODEL} \
+            --dst_onnx_path ${REFINE_MODEL} \
+            --bsz  ${BSZ}               \
+            --imgsz  ${IMGSIZE}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skipped, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py \
+            --batch_size ${BSZ} \
+            --origin_model ${REFINE_MODEL} \
+            --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                  \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/ixrt_common/config/DEIT_B_CONFIG b/models/cv/classification/ixrt_common/config/DEIT_B_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..4170a28b9a0ff4d8518aee4cbe302a3aec2375f6
--- /dev/null
+++ b/models/cv/classification/ixrt_common/config/DEIT_B_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=Deit_base
+ORIGINE_MODEL=deit_b.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=hist_percentile
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
\ No newline at end of file
diff --git a/models/cv/classification/ixrt_common/config/MOBILENET_V1_CONFIG b/models/cv/classification/ixrt_common/config/MOBILENET_V1_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..c49b9b6a30c346b64106ea68609152fee1ca0c32
--- /dev/null
+++ b/models/cv/classification/ixrt_common/config/MOBILENET_V1_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=MobileNet_v1
+ORIGINE_MODEL=mobilenet_v1.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=percentile
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST="fc7 prob"
+QUANT_EXIST_ONNX=
\ No newline at end of file
diff --git a/models/cv/classification/ixrt_common/config/VIT_CONFIG b/models/cv/classification/ixrt_common/config/VIT_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..0e46d9e9e2e86b0f3051593584b70ca77b5360bf
--- /dev/null
+++ b/models/cv/classification/ixrt_common/config/VIT_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=vit_b_16
+ORIGINE_MODEL=vit_b_16_sim.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=32
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v1/ixrt/README.md b/models/cv/classification/mobilenet_v1/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6451f4cb4b73cf03daa4223fb9facd35504804cb
--- /dev/null
+++ b/models/cv/classification/mobilenet_v1/ixrt/README.md
@@ -0,0 +1,74 @@
+# MobileNetV1 (ixRT)
+
+## Model Description
+
+MobileNetV1 is a efficient model architecture using depthwise separable convolutions. It is designed to efficiently maximize accuracy while being mindful of the tight resource constraints.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.4.0 | 26.06 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <http://files.deepspark.org.cn:880/deepspark/data/checkpoints/mobilenet_v1.onnx>
+
+Download the [imagenet](https://www.image-net.org/download.php) to download the validation dataset.
+
+### Install Dependencies
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-glx
+
+pip3 install -r ../../ixrt_common/requirements.txt
+```
+
+### Model Conversion
+
+```bash
+mkdir checkpoints
+cd checkpoints
+wget http://files.deepspark.org.cn:880/deepspark/data/checkpoints/mobilenet_v1.onnx
+```
+
+## Model Inference
+
+```bash
+export PROJ_DIR=./
+export DATASETS_DIR=/path/to/imagenet_val/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=../../ixrt_common/
+export CONFIG_DIR=../../ixrt_common/config/MOBILENET_V1_CONFIG
+```
+
+### FP16
+
+```bash
+# Test ACC
+bash scripts/infer_mobilenet_v1_fp16_accuracy.sh
+# Test FPS
+bash scripts/infer_mobilenet_v1_fp16_performance.sh
+```
+
+### INT8
+
+```bash
+# Test ACC
+bash scripts/infer_mobilenet_v1_int8_accuracy.sh
+# Test FPS
+bash scripts/infer_mobilenet_v1_int8_performance.sh
+```
+
+## Model Results
+
+| Model       | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
+| ----------- | --------- | --------- | ------- | -------- | -------- |
+| MobileNetV1 | 32        | FP16      | 13862.317   | 71.6  | 90.3  |
+| MobileNetV1 | 32        | INT8      | 17485.601  | 70.9   | 89.9 |
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v1/ixrt/ci/prepare.sh b/models/cv/classification/mobilenet_v1/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..02b34ea03d1e2d971ba1bb5de584df38cf41b553
--- /dev/null
+++ b/models/cv/classification/mobilenet_v1/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install tqdm onnxsim opencv-python==4.6.0.66
+
+mkdir -p checkpoints
+cp /root/data/checkpoints/mobilenet_v1.onnx checkpoints/
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_fp16_accuracy.sh b/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ffe54388de37b72d2a7a6282d5c388842b19cd95
--- /dev/null
+++ b/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_fp16_accuracy.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    let step++
+    echo;
+    echo [STEP ${step}] : Quant Model
+    if [[ -z ${QUANT_EXIST_ONNX} ]];then
+        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+    fi
+    if [[ -f ${QUANT_EXIST_ONNX} ]];then
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+    else
+        python3 ${RUN_DIR}/quant.py            \
+            --model ${SIM_MODEL}               \
+            --model_name ${MODEL_NAME}         \
+            --dataset_dir ${DATASETS_DIR}      \
+            --observer ${QUANT_OBSERVER}       \
+            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+            --save_dir $CHECKPOINTS_DIR        \
+            --bsz   ${QUANT_BATCHSIZE}         \
+            --step  ${QUANT_STEP}              \
+            --seed  ${QUANT_SEED}              \
+            --imgsz ${IMGSIZE}
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Generate ${SIM_MODEL}
+    fi
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                  \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_fp16_performance.sh b/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..43a0f216ac9884236f12bf5358c200a59ce290b3
--- /dev/null
+++ b/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_fp16_performance.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    let step++
+    echo;
+    echo [STEP ${step}] : Quant Model
+    if [[ -z ${QUANT_EXIST_ONNX} ]];then
+        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+    fi
+    if [[ -f ${QUANT_EXIST_ONNX} ]];then
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+    else
+        python3 ${RUN_DIR}/quant.py            \
+            --model ${SIM_MODEL}               \
+            --model_name ${MODEL_NAME}         \
+            --dataset_dir ${DATASETS_DIR}      \
+            --observer ${QUANT_OBSERVER}       \
+            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+            --save_dir $CHECKPOINTS_DIR        \
+            --bsz   ${QUANT_BATCHSIZE}         \
+            --step  ${QUANT_STEP}              \
+            --seed  ${QUANT_SEED}              \
+            --imgsz ${IMGSIZE}
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Generate ${SIM_MODEL}
+    fi
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                  \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_int8_accuracy.sh b/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_int8_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ab3292a656efc48b156509bb121a6104f948c81f
--- /dev/null
+++ b/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_int8_accuracy.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=int8
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    let step++
+    echo;
+    echo [STEP ${step}] : Quant Model
+    if [[ -z ${QUANT_EXIST_ONNX} ]];then
+        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+    fi
+    if [[ -f ${QUANT_EXIST_ONNX} ]];then
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+    else
+        python3 ${RUN_DIR}/quant.py            \
+            --model ${SIM_MODEL}               \
+            --model_name ${MODEL_NAME}         \
+            --dataset_dir ${DATASETS_DIR}      \
+            --observer ${QUANT_OBSERVER}       \
+            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+            --save_dir $CHECKPOINTS_DIR        \
+            --bsz   ${QUANT_BATCHSIZE}         \
+            --step  ${QUANT_STEP}              \
+            --seed  ${QUANT_SEED}              \
+            --imgsz ${IMGSIZE}
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Generate ${SIM_MODEL}
+    fi
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                  \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_int8_performance.sh b/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_int8_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a8c63eab8a24ff4681bcfd94c44ed73971781dd6
--- /dev/null
+++ b/models/cv/classification/mobilenet_v1/ixrt/scripts/infer_mobilenet_v1_int8_performance.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=int8
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    let step++
+    echo;
+    echo [STEP ${step}] : Quant Model
+    if [[ -z ${QUANT_EXIST_ONNX} ]];then
+        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+    fi
+    if [[ -f ${QUANT_EXIST_ONNX} ]];then
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+    else
+        python3 ${RUN_DIR}/quant.py            \
+            --model ${SIM_MODEL}               \
+            --model_name ${MODEL_NAME}         \
+            --dataset_dir ${DATASETS_DIR}      \
+            --observer ${QUANT_OBSERVER}       \
+            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+            --save_dir $CHECKPOINTS_DIR        \
+            --bsz   ${QUANT_BATCHSIZE}         \
+            --step  ${QUANT_STEP}              \
+            --seed  ${QUANT_SEED}              \
+            --imgsz ${IMGSIZE}
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Generate ${SIM_MODEL}
+    fi
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                  \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/vit/ixrt/README.md b/models/cv/classification/vit/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e0e7fb7e0a201359b59fb4c0b5c97e6ece45bfbf
--- /dev/null
+++ b/models/cv/classification/vit/ixrt/README.md
@@ -0,0 +1,64 @@
+# Vision Transformer (ViT) (ixRT)
+
+## Model Description
+
+Vision Transformer (ViT) applies a pure transformer to images without any convolution. It divides an image into patches and processes them through transformer layers.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.4.0 | 26.06 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <http://files.deepspark.org.cn:880/deepspark/data/checkpoints/vit_b_16_sim.onnx>
+
+Download the [imagenet](https://www.image-net.org/download.php) to download the validation dataset.
+
+### Install Dependencies
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-glx
+
+pip3 install -r ../../ixrt_common/requirements.txt
+```
+
+### Model Conversion
+
+```bash
+mkdir checkpoints
+cd checkpoints
+wget http://files.deepspark.org.cn:880/deepspark/data/checkpoints/vit_b_16_sim.onnx
+```
+
+## Model Inference
+
+```bash
+export PROJ_DIR=./
+export DATASETS_DIR=/path/to/imagenet_val/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=../../ixrt_common/
+export CONFIG_DIR=../../ixrt_common/config/VIT_CONFIG
+```
+
+### FP16
+
+```bash
+# Test ACC
+bash scripts/infer_vit_fp16_accuracy.sh
+# Test FPS
+bash scripts/infer_vit_fp16_performance.sh
+```
+
+## Model Results
+
+| Model       | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
+| ----------- | --------- | --------- | ------- | -------- | -------- |
+| ViT-B/16    | 32        | FP16      | 461.038  | 81.1    | 95.3  |
\ No newline at end of file
diff --git a/models/cv/classification/vit/ixrt/ci/prepare.sh b/models/cv/classification/vit/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c12ce37bf8fca6326d708692c5cbe58941999e97
--- /dev/null
+++ b/models/cv/classification/vit/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install tqdm onnxsim opencv-python==4.6.0.66
+
+mkdir -p checkpoints
+cp /root/data/checkpoints/vit_b_16_sim.onnx checkpoints/
\ No newline at end of file
diff --git a/models/cv/classification/vit/ixrt/scripts/infer_vit_fp16_accuracy.sh b/models/cv/classification/vit/ixrt/scripts/infer_vit_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1d6c116c77a8f943c638a37b6cb232b220c906ca
--- /dev/null
+++ b/models/cv/classification/vit/ixrt/scripts/infer_vit_fp16_accuracy.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+set -euo pipefail
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    echo "fails"
+    EXIT_STATUS=1
+    fi
+}
+# Run paraments
+warm_up=10
+BSZ=32
+TGT=-1
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+datasets_dir=${DATASETS_DIR}
+onnx_model=${CHECKPOINTS_DIR}/vit_b_16_sim.onnx
+engine_file=${CHECKPOINTS_DIR}/vit_b_16.engine
+
+echo "Build Fp16 Engine!"
+python3 ${RUN_DIR}/build_engine.py             \
+        --precision float16         \
+        --model ${onnx_model}       \
+        --engine ${engine_file}; check_status
+
+echo "Fp16 Inference Acc!"
+python3 ${RUN_DIR}/inference.py                        \
+        --test_mode ACC                     \
+        --engine_file ${engine_file}        \
+        --datasets_dir ${datasets_dir}      \
+        --warm_up ${warm_up}                \
+        --bsz ${BSZ}                        \
+        --acc_target ${TGT};  check_status
\ No newline at end of file
diff --git a/models/cv/classification/vit/ixrt/scripts/infer_vit_fp16_performance.sh b/models/cv/classification/vit/ixrt/scripts/infer_vit_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ea1f93cbdc474cf6b0a59f8b779f292381ae646c
--- /dev/null
+++ b/models/cv/classification/vit/ixrt/scripts/infer_vit_fp16_performance.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+set -euo pipefail
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    echo "fails"
+    EXIT_STATUS=1
+    fi
+}
+# Run paraments
+warm_up=10
+loop_count=50
+BSZ=32
+TGT=-1
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+datasets_dir=${DATASETS_DIR}
+onnx_model=${CHECKPOINTS_DIR}/vit_b_16_sim.onnx
+engine_file=${CHECKPOINTS_DIR}/vit_b_16.engine
+
+echo "Build Fp16 Engine!"
+python3 ${RUN_DIR}/build_engine.py             \
+        --precision float16         \
+        --model ${onnx_model}       \
+        --engine ${engine_file}; check_status
+
+echo "Fp16 Inference Fps!"
+python3 ${RUN_DIR}/inference.py                        \
+        --test_mode FPS                     \
+        --engine_file ${engine_file}        \
+        --datasets_dir ${datasets_dir}      \
+        --warm_up ${warm_up}                \
+        --bsz ${BSZ}                        \
+        --loop_count ${loop_count}          \
+        --fps_target ${TGT};  check_status
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/README.md b/models/cv/ocr/dbnet/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dccaaebfb0666bcda4e005d30a1faed16d337306
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/README.md
@@ -0,0 +1,67 @@
+# DBNet (ixRT)
+
+## Model Description
+
+DBNet (Differentiable Binarization Network) is a scene text detection model that uses a differentiable binarization process for robust text detection.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.4.0 | 26.06 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained models:
+- r50_en_dbnet: <http://files.deepspark.org.cn:880/deepspark/data/checkpoints/r50_en_dbnet.onnx>
+
+Dataset: ICDAR 2015 <http://files.deepspark.org.cn:880/deepspark/data/datasets/icdar_2015.zip>
+
+### Install Dependencies
+
+```bash
+pip3 install shapely pyclipper opencv-python==4.6.0.66 tqdm
+```
+
+### Model Conversion
+
+```bash
+mkdir checkpoints
+cd checkpoints
+wget http://files.deepspark.org.cn:880/deepspark/data/checkpoints/r50_en_dbnet.onnx
+```
+
+## Model Inference
+
+```bash
+export DATASETS_DIR=/path/to/icdar2015/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=./
+```
+
+### FP16
+
+```bash
+# Test ACC
+bash scripts/infer_dbnet_fp16_accuracy.sh
+# Test FPS
+bash scripts/infer_dbnet_fp16_performance.sh
+```
+
+### INT8
+
+```bash
+# Test ACC
+bash scripts/infer_dbnet_int8_accuracy.sh
+# Test FPS
+bash scripts/infer_dbnet_int8_performance.sh
+```
+
+## Model Results
+
+| Model       | Backbone | BatchSize | Precision | FPS     | Hmean   |
+| ----------- | -------- | --------- | --------- | ------- | ------- |
+| DBNet       | r50_en   | 32        | FP16      | 143.85   | 0.803    |
+| DBNet       | r50_en   | 32        | INT8      | 143.73   | 0.803    |
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/build_engine.py b/models/cv/ocr/dbnet/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..96c4435dc6445cbe9c9daa299d6bb355c1a3d7ff
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/build_engine.py
@@ -0,0 +1,49 @@
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+from tensorrt import Dims
+
+
+def main(config):
+    
+    input_shape = [args.batch_size,3, 736,1280]
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    build_config.set_flag(precision)
+    if config.precision == "int8":
+        build_config.set_flag(tensorrt.BuilderFlag.FP16)
+    
+    input_tensor = network.get_input(0)
+    input_tensor.shape = Dims(input_shape)
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str,default="wide_deep.onnx")
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="float16",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default="wide_deep.engine")
+    parser.add_argument("--batch_size", type=int, default=1)
+    
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/ci/prepare.sh b/models/cv/ocr/dbnet/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d2101a9f91f90986bd5828e6940e2ecded6886a1
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/ci/prepare.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+pip3 install shapely pyclipper opencv-python==4.6.0.66 tqdm
+
+mkdir -p checkpoints
+cp /root/data/checkpoints/r50_en_dbnet.onnx checkpoints/
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/dbnet_inference.py b/models/cv/ocr/dbnet/ixrt/dbnet_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f3bda1d3c8daab5bc05e7d6c4a47da0a9e13c17
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/dbnet_inference.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import cv2
+import numpy as np
+import argparse
+import sys
+
+
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from util.common import eval_batch, create_engine_context, get_io_bindings
+
+from util import TextDetector
+
+def check_target(inference, target):
+    satisfied = False
+    if inference > target:
+        satisfied = True  
+    return satisfied      
+
+process_configs ={
+    #pre process config 
+    'std': [0.229, 0.224, 0.225],
+    'mean': [0.485, 0.456, 0.406],
+    'scale': 1./255.,
+    'image_shape':(1280,736),#width height
+    
+    #post precess config
+    'thresh':0.3,
+    'box_thresh':0.5,
+    'max_candidates':1000,
+    'unclip_ratio':2,
+    'use_dilation':False,
+    'score_mode':'fast',
+    'box_type':'quad',
+    'batch_size':1
+
+}
+
+def make_parser():
+    parser = argparse.ArgumentParser("DBnet Eval")
+    parser.add_argument("--datasets_dir", type=str, default="data/icdar_2015_images",  help="datasets dir ")
+    parser.add_argument("--engine_file", type=str, default="data/unit_test_r50_en_dbnet_bin/int8_r50_en_dbnet.engine",  help="weights dir")
+
+    parser.add_argument("-b", "--batch_size", type=int, default=1, help="batch size")
+    parser.add_argument("-d", "--device", default=1, type=int, help="device for val")
+    parser.add_argument("--img_height", default=736, type=int, help="test img height")
+    parser.add_argument("--img_width", default=1280, type=int, help="test img width")
+    parser.add_argument("--target_hmean", default=0.82, type=float, help="target Hmean")
+    parser.add_argument("--target_fps", default=30, type=float, help="target Hmean")
+    
+    parser.add_argument("--target", default="precision", type=str, help="precision or pref")
+    parser.add_argument("--warm_up", default=20, type=int , help="warm_up")
+    parser.add_argument("--loop_count", default=100, type=int , help="loop_count")
+    parser.add_argument("--seed", default=None, type=int, help="eval seed")
+    return parser
+
+
+
+
+def eval(args):  
+    
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+    engine, context = create_engine_context(args.engine_file, logger)  
+    
+    process_configs["image_dir"]=args.datasets_dir
+    process_configs["label_dir"] = args.datasets_dir
+    process_configs["image_shape"]= (args.img_height,args.img_width)
+    process_configs["batch_size"] = args.batch_size
+    db_det = TextDetector(engine,context,process_configs)
+    if args.target=="precision":
+        metrics = db_det.eval_icdar_2015(args.datasets_dir,args.batch_size)
+        print("="*40)
+        print("Precision:{0},Recall:{1},Hmean:{2}".format(round(metrics["precision"],3),round(metrics["recall"],3),round(metrics["hmean"],3)))
+        print("="*40)
+        print(f"Check hmean Test : {round(metrics['hmean'],3)}  Target:{args.target_hmean} \
+              State : {'Pass' if round(metrics['hmean'],3) >= args.target_hmean else 'Fail'}")
+        status_hmean = check_target(metrics["hmean"], args.target_hmean)
+        metricResult = {"metricResult": {}}
+        metricResult["metricResult"]["hmean"] = round(metrics["hmean"], 3)
+        print(metricResult)
+        sys.exit(int(not (status_hmean)))
+    else:
+        fps = db_det.perf(args.warm_up,args.loop_count,args.batch_size)
+        print("="*40)
+        print("fps:{0}".format(round(fps,2)))
+        print("="*40)
+        print(f"Check fps Test : {round(fps,3)}  Target:{args.target_fps} State : {'Pass' if  fps >= args.target_fps else 'Fail'}")
+        status_fps = check_target(fps, args.target_fps)
+        metricResult = {"metricResult": {}}
+        metricResult["metricResult"]["fps"] = round(fps, 3)
+        print(metricResult)
+        sys.exit(int(not (status_fps)))
+            
+if __name__ == "__main__":
+    args = make_parser().parse_args()
+    eval(args)
+    
+        
+    
+
diff --git a/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_fp16_accuracy.sh b/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8ca2a706b393e95c335092592e7c3831ec2b34d0
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_fp16_accuracy.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+BSZ=16
+TGT=0.67
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+	EXIT_STATUS=1
+    fi
+}
+
+DATASETS_DIR="/root/data/datasets/icdar_2015/icdar_2015_images"
+CHECKPOINTS_DIR="./checkpoints"
+RUN_DIR="${RUN_DIR:-.}"
+
+python3 ${RUN_DIR}/build_engine.py --model=${CHECKPOINTS_DIR}/r50_en_dbnet.onnx\
+                        --engine=${CHECKPOINTS_DIR}/int8_r50_en_dbnet.engine\
+                        --batch_size=${BSZ}\
+                        --precision="int8"
+
+python3 ${RUN_DIR}/dbnet_inference.py \
+        --datasets_dir ${DATASETS_DIR} \
+        --engine_file ${CHECKPOINTS_DIR}/int8_r50_en_dbnet.engine \
+        --target "precision" \
+        --batch_size ${BSZ} \
+        --target_hmean ${TGT}; check_status
+        
+ exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_fp16_performance.sh b/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6dcde6c780ce5b513dbdf161dc536a81d7ba7ec8
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_fp16_performance.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+BSZ=16
+TGT=-1
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+	EXIT_STATUS=1
+    fi
+}
+
+DATASETS_DIR="/root/data/datasets/icdar_2015/icdar_2015_images"
+CHECKPOINTS_DIR="./checkpoints"
+RUN_DIR="${RUN_DIR:-.}"
+
+python3 ${RUN_DIR}/build_engine.py --model=${CHECKPOINTS_DIR}/r50_en_dbnet.onnx\
+                        --engine=${CHECKPOINTS_DIR}/float16_r50_en_dbnet.engine\
+                        --batch_size=${BSZ}\
+                        --precision="float16"
+
+python3 ${RUN_DIR}/dbnet_inference.py \
+        --engine_file ${CHECKPOINTS_DIR}/float16_r50_en_dbnet.engine \
+        --target "perf" \
+        --batch_size ${BSZ} \
+        --target_fps ${TGT};check_status
+        
+ exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_int8_accuracy.sh b/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_int8_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8ca2a706b393e95c335092592e7c3831ec2b34d0
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_int8_accuracy.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+BSZ=16
+TGT=0.67
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+	EXIT_STATUS=1
+    fi
+}
+
+DATASETS_DIR="/root/data/datasets/icdar_2015/icdar_2015_images"
+CHECKPOINTS_DIR="./checkpoints"
+RUN_DIR="${RUN_DIR:-.}"
+
+python3 ${RUN_DIR}/build_engine.py --model=${CHECKPOINTS_DIR}/r50_en_dbnet.onnx\
+                        --engine=${CHECKPOINTS_DIR}/int8_r50_en_dbnet.engine\
+                        --batch_size=${BSZ}\
+                        --precision="int8"
+
+python3 ${RUN_DIR}/dbnet_inference.py \
+        --datasets_dir ${DATASETS_DIR} \
+        --engine_file ${CHECKPOINTS_DIR}/int8_r50_en_dbnet.engine \
+        --target "precision" \
+        --batch_size ${BSZ} \
+        --target_hmean ${TGT}; check_status
+        
+ exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_int8_performance.sh b/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_int8_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..701f2496bedb3866d13ec29fa230d072a413519d
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/scripts/infer_dbnet_int8_performance.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+BSZ=16
+TGT=-1
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+	EXIT_STATUS=1
+    fi
+}
+
+DATASETS_DIR="/root/data/datasets/icdar_2015/icdar_2015_images"
+CHECKPOINTS_DIR="./checkpoints"
+RUN_DIR="${RUN_DIR:-.}"
+
+python3 ${RUN_DIR}/build_engine.py --model=${CHECKPOINTS_DIR}/r50_en_dbnet.onnx\
+                        --engine=${CHECKPOINTS_DIR}/int8_r50_en_dbnet.engine\
+                        --batch_size=${BSZ}\
+                        --precision="int8"
+
+python3 ${RUN_DIR}/dbnet_inference.py \
+        --engine_file ${CHECKPOINTS_DIR}/int8_r50_en_dbnet.engine \
+        --target "perf" \
+        --batch_size ${BSZ} \
+        --target_fps ${TGT};check_status
+        
+ exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/util/__init__.py b/models/cv/ocr/dbnet/ixrt/util/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b3184b72164a55054d711d5fc7fc982405cf434
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/util/__init__.py
@@ -0,0 +1,4 @@
+from .dbnet_det import TextDetector
+
+
+
diff --git a/models/cv/ocr/dbnet/ixrt/util/common.py b/models/cv/ocr/dbnet/ixrt/util/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b88ab9ed438ca9d1fdd76388ca783b5ad10c008
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/util/common.py
@@ -0,0 +1,66 @@
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+    batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+    values, indices = batch_score.topk(5)
+    top1, top5 = 0, 0
+    for idx, label in enumerate(batch_label):
+
+        if label == indices[idx][0]:
+            top1 += 1
+        if label in indices[idx]:
+            top5 += 1
+    return top1, top5
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/util/db_postprocess.py b/models/cv/ocr/dbnet/ixrt/util/db_postprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..20212d7dc0b4978878cf630c848b46decabb1cd3
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/util/db_postprocess.py
@@ -0,0 +1,249 @@
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import cv2
+from shapely.geometry import Polygon
+import pyclipper
+
+
+class DBPostProcess(object):
+    """
+    The post process for Differentiable Binarization (DB).
+    """
+
+    def __init__(self,
+                 thresh=0.27,
+                 box_thresh=0.6,
+                 max_candidates=1000,
+                 unclip_ratio=1.7,
+                 use_dilation=False,
+                 score_mode="fast",
+                 box_type='quad',
+                 **kwargs):
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
+        self.min_size = 3
+        self.score_mode = score_mode
+        self.box_type = box_type
+        assert score_mode in [
+            "slow", "fast"
+        ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
+
+        self.dilation_kernel = None if not use_dilation else np.array(
+            [[1, 1], [1, 1]])
+
+    def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
+        '''
+        _bitmap: single map with shape (1, H, W),
+            whose values are binarized as {0, 1}
+        '''
+
+        bitmap = _bitmap
+        height, width = bitmap.shape
+
+        boxes = []
+        scores = []
+
+        contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
+                                       cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+
+        for contour in contours[:self.max_candidates]:
+            epsilon = 0.002 * cv2.arcLength(contour, True)
+            approx = cv2.approxPolyDP(contour, epsilon, True)
+            points = approx.reshape((-1, 2))
+            if points.shape[0] < 4:
+                continue
+
+            score = self.box_score_fast(pred, points.reshape(-1, 2))
+            if self.box_thresh > score:
+                continue
+
+            if points.shape[0] > 2:
+                box = self.unclip(points, self.unclip_ratio)
+                if len(box) > 1:
+                    continue
+            else:
+                continue
+            box = box.reshape(-1, 2)
+
+            _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
+            if sside < self.min_size + 2:
+                continue
+
+            box = np.array(box)
+            box[:, 0] = np.clip(
+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
+            boxes.append(box.tolist())
+            scores.append(score)
+        return boxes, scores
+
+    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height,pad_w,pad_h,scale):
+        '''
+        _bitmap: single map with shape (1, H, W),
+                whose values are binarized as {0, 1}
+        '''
+
+        bitmap = _bitmap
+        height, width = bitmap.shape
+
+        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
+                                cv2.CHAIN_APPROX_SIMPLE)
+        if len(outs) == 3:
+            img, contours, _ = outs[0], outs[1], outs[2]
+        elif len(outs) == 2:
+            contours, _ = outs[0], outs[1]
+
+        num_contours = min(len(contours), self.max_candidates)
+
+        boxes = []
+        scores = []
+        for index in range(num_contours):
+            contour = contours[index]
+            points, sside = self.get_mini_boxes(contour)
+            if sside < self.min_size:
+                continue
+            points = np.array(points)
+            if self.score_mode == "fast":
+                score = self.box_score_fast(pred, points.reshape(-1, 2))
+            else:
+                score = self.box_score_slow(pred, contour)
+            if self.box_thresh > score:
+                continue
+
+            box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2)
+            box, sside = self.get_mini_boxes(box)
+            if sside < self.min_size + 2:
+                continue
+            box = np.array(box)
+            
+            box[:, 0] = box[:, 0]-pad_w
+            box[:, 1] = box[:, 1]-pad_h
+                        
+            box[:, 0] = np.clip(
+                np.round(box[:, 0] / scale), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round(box[:, 1] / scale), 0, dest_height)
+
+            # box[:, 0] = np.clip(
+            #     np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            # box[:, 1] = np.clip(
+            #     np.round(box[:, 1] / height * dest_height), 0, dest_height)
+                     
+            #box[:, 0] = box[:, 0]+(int)(pad_w*dest_width/width)
+            #box[:, 1] = box[:, 1]+(int)(pad_h*dest_height/height )
+            
+            
+            #box[:, 0] = np.clip(np.round(box[:, 0]) , 0, 1280)
+            #box[:, 1] = np.clip(np.round(box[:, 1]) , 0, 736)
+
+            boxes.append(box.astype("int32"))
+            scores.append(score)
+        return np.array(boxes, dtype="int32"), scores
+
+    def unclip(self, box, unclip_ratio):
+        poly = Polygon(box)
+        distance = poly.area * unclip_ratio / poly.length
+        offset = pyclipper.PyclipperOffset()
+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        expanded = np.array(offset.Execute(distance))
+        return expanded
+
+    def get_mini_boxes(self, contour):
+        bounding_box = cv2.minAreaRect(contour)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+
+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_1 = 0
+            index_4 = 1
+        else:
+            index_1 = 1
+            index_4 = 0
+        if points[3][1] > points[2][1]:
+            index_2 = 2
+            index_3 = 3
+        else:
+            index_2 = 3
+            index_3 = 2
+
+        box = [
+            points[index_1], points[index_2], points[index_3], points[index_4]
+        ]
+        return box, min(bounding_box[1])
+
+    def box_score_fast(self, bitmap, _box):
+        '''
+        box_score_fast: use bbox mean score as the mean score
+        '''
+        h, w = bitmap.shape[:2]
+        box = _box.copy()
+        xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1)
+
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        box[:, 0] = box[:, 0] - xmin
+        box[:, 1] = box[:, 1] - ymin
+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+
+    def box_score_slow(self, bitmap, contour):
+        '''
+        box_score_slow: use polyon mean score as the mean score
+        '''
+        h, w = bitmap.shape[:2]
+        contour = contour.copy()
+        contour = np.reshape(contour, (-1, 2))
+
+        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
+        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
+        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
+        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
+
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+
+        contour[:, 0] = contour[:, 0] - xmin
+        contour[:, 1] = contour[:, 1] - ymin
+
+        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+
+    def __call__(self, outs_dict, shape_list):
+        pred = outs_dict['maps']
+        # if isinstance(pred, paddle.Tensor):
+        #     pred = pred.numpy()
+        pred = pred[:, 0, :, :]
+        segmentation = pred > self.thresh
+        boxes_batch = []
+        for batch_index in range(shape_list.shape[0]):
+            src_h, src_w,pad_h,pad_w,scale= shape_list[batch_index]
+            
+            if self.dilation_kernel is not None:
+                mask = cv2.dilate(
+                    np.array(segmentation[batch_index]).astype(np.uint8),
+                    self.dilation_kernel)
+            else:
+                mask = segmentation[batch_index]
+            if self.box_type == 'poly':
+                boxes, scores = self.polygons_from_bitmap(pred[batch_index],
+                                                          mask, src_w, src_h)
+            elif self.box_type == 'quad':
+                boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
+                                                       src_w, src_h,pad_w,pad_h,scale)
+            else:
+                raise ValueError("box_type can only be one of ['quad', 'poly']")
+
+            boxes_batch.append({'points': boxes,"scores":scores})
+        return boxes_batch
+    
+    
+    
+    
\ No newline at end of file
diff --git a/models/cv/ocr/dbnet/ixrt/util/dbnet_det.py b/models/cv/ocr/dbnet/ixrt/util/dbnet_det.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f56493d9a468eafba0edfe8a3f599042a3108f1
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/util/dbnet_det.py
@@ -0,0 +1,235 @@
+import numpy as np 
+import cv2
+import glob
+import os 
+import math
+from tqdm import tqdm
+import time
+from .db_postprocess import DBPostProcess
+from .eval_det_iou import DetectionIoUEvaluator
+from .common import  get_io_bindings
+import torch
+from cuda import cuda, cudart
+
+
+def img2label_paths(img_paths):
+    # Define label paths as a function of image paths
+    sa, sb = f'{os.sep}icdar_2015_images{os.sep}', f'{os.sep}icdar_2015_labels{os.sep}gt_'  # /images/, /labels/ substrings
+    
+    return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
+
+def rotate(angle, x, y):
+    """
+    基于原点的弧度旋转
+
+    :param angle:   弧度
+    :param x:       x
+    :param y:       y
+    :return:
+    """
+    rotatex = math.cos(angle) * x - math.sin(angle) * y
+    rotatey = math.cos(angle) * y + math.sin(angle) * x
+    return rotatex, rotatey
+
+def xy_rorate(theta, x, y, centerx, centery):
+    """
+    针对中心点进行旋转
+
+    :param theta:
+    :param x:
+    :param y:
+    :param centerx:
+    :param centery:
+    :return:
+    """
+    r_x, r_y = rotate(theta, x - centerx, y - centery)
+    return centerx+r_x, centery+r_y
+
+def rec_rotate(x, y, width, height, theta):
+    """
+    传入矩形的x,y和宽度高度,弧度,转成QUAD格式
+    :param x:
+    :param y:
+    :param width:
+    :param height:
+    :param theta:
+    :return:
+    """
+    centerx = x + width / 2
+    centery = y + height / 2
+
+    x1, y1 = xy_rorate(theta, x, y, centerx, centery)
+    x2, y2 = xy_rorate(theta, x+width, y, centerx, centery)
+    x3, y3 = xy_rorate(theta, x, y+height, centerx, centery)
+    x4, y4 = xy_rorate(theta, x+width, y+height, centerx, centery)
+
+    return [(int(x1), int(y1)), (int(x2), int(y2)),  (int(x4), int(y4)), (int(x3), int(y3))]  #clock wise
+
+
+def letterbox(im, new_shape=(736, 1280), color=(114, 114, 114), auto=False, scaleup=True, stride=32):
+    # Resize and pad image while meeting stride-multiple constraints
+
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+        
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    
+    # Compute padding
+    new_unpad = int(round(shape[0] * r)), int(round(shape[1] * r))
+    dw, dh = new_shape[1] - new_unpad[1], new_shape[0] - new_unpad[0]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    
+    if shape != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad[::-1], interpolation=cv2.INTER_LINEAR)
+    
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im1 = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im1, r, dw, dh
+
+
+def draw_det_res(img,dt_boxes):
+    if len(dt_boxes) > 0:
+        src_im = img
+        for box in dt_boxes:
+            box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(src_im, [box], True, color=(255, 114, 255), thickness=2)
+        cv2.imwrite("det3.jpg", src_im)
+
+
+class TextDetector(object):
+    def __init__(self,engine,context, configs):   
+        self.engine= engine
+        self.context = context
+        self.configs = configs
+        self.postprocess = DBPostProcess()
+    def batch_forward(self,inputs,outputs,allocations,batch_data,shape_list):
+        
+        output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+        input = np.zeros(inputs[0]["shape"], inputs[0]["dtype"])
+        real_batch = batch_data.shape[0]
+        batch_data= np.transpose(batch_data,[0,3,1,2])
+        batch_data = batch_data.astype(inputs[0]["dtype"])
+        batch_data = np.ascontiguousarray(batch_data)
+        input[:real_batch, :, :, :] = batch_data
+        
+        err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes)
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+        self.context.execute_v2(allocations)
+        err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"])
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+        outs_dict={"maps":output}
+        post_result = self.postprocess(outs_dict,shape_list)
+        return post_result
+    
+    def get_dataloader(self,datasets_dir, bsz):
+        image_files= glob.glob(str(datasets_dir+"/*"))
+        batch_img, shape_list, batch_label_files = [],[],[] 
+        label_files = img2label_paths(image_files)
+        for image_file,label_file in zip(image_files,label_files):
+            img = cv2.imread(image_file)    
+            letter_img, img, org_img, scale, pad_w, pad_h = self.pre_process(image_file)
+            shape_list.append([img.shape[0],img.shape[1],pad_h,pad_w,scale]) 
+            batch_img.append(np.expand_dims(img, 0))
+            batch_label_files.append(label_file)
+            if len(batch_img) == bsz:
+                yield np.concatenate(batch_img, 0), np.array(shape_list).astype(np.int32),batch_label_files
+                batch_img, shape_list, batch_label_files = [],[],[]
+    
+        if len(batch_img) > 0:
+            yield np.concatenate(batch_img, 0), np.array(shape_list), batch_label_files
+    
+    def eval_icdar_2015(self,img_dir,batch_size):
+        dataloader = self.get_dataloader(img_dir,batch_size)
+        label_files =[]
+        evaluator = DetectionIoUEvaluator()
+        
+        inputs, outputs, allocations = get_io_bindings(self.engine)        
+        gts =[]
+        preds=[]
+        all_boxes= []
+        for i, data in enumerate(tqdm(dataloader,disable=False)):        
+            batch_data, shape_list,batch_label = data
+            label_files.extend(batch_label)
+            post_result= self.batch_forward(inputs,outputs,allocations,batch_data,shape_list)
+            all_boxes.extend(post_result)
+        print("============start evel=========================")    
+        for i, per_image_boxes in  enumerate(all_boxes):
+            one_pred=[]
+            dt_boxes = per_image_boxes["points"]
+            for bbox in dt_boxes:
+                one_pred_res={}
+                one_pred_res["points"]=[tuple(x) for x in bbox.tolist()] 
+                one_pred_res["text"]="text"
+                one_pred_res["ignore"] =False
+                one_pred.append(one_pred_res)
+            preds.append(one_pred)
+            label_file= label_files[i] 
+            one_gt=[]  
+            with open(label_file) as f:
+                lines = f.readlines()
+                for line in lines:
+                    one_gt_res={}                  
+                    line_label=line.strip().split(",")[:9]
+                    x1,y1,x2,y2,x3,y3,x4,y4,label =line_label
+                    gt_bbox=  [(int(x1), int(y1)), (int(x2), int(y2)),  (int(x3), int(y3)), (int(x4), int(y4))]
+                    one_gt_res["points"]=gt_bbox
+                    one_gt_res["text"]=label
+                    if label=="###":
+                        one_gt_res["ignore"] =True
+                    else:
+                        one_gt_res["ignore"] =False
+                    one_gt.append(one_gt_res)
+                gts.append(one_gt)
+                
+                 
+        results = []
+        for gt, pred in zip(gts, preds):
+            results.append(evaluator.evaluate_image(gt, pred))
+            metrics = evaluator.combine_results(results)     
+        return metrics
+        
+    def perf(self,warm_up,loop_count,batch_size):
+        inputs, outputs, allocations = get_io_bindings(self.engine)        
+        if warm_up > 0:
+            print("\nWarm Start.")
+            for i in range(warm_up):
+                self.context.execute_v2(allocations)
+            print("Warm Done.")
+        torch.cuda.synchronize()
+        start_time = time.time()
+        for i in range(loop_count):
+            self.context.execute_v2(allocations)
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+        fps = loop_count * batch_size / forward_time
+        fps = round(fps,2)
+        return fps
+        
+        
+    def pre_process(self,img_file):
+        org_img = cv2.imread(img_file)
+        image = org_img.copy()
+        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        letter_img, r, dw, dh= letterbox(image,self.configs["image_shape"])        
+        in_img = letter_img.copy()
+        #image = cv2.resize(image, (1280, 736))
+        in_img = in_img.astype(np.float32)
+        in_img /= 255 
+        in_img =(in_img-0.456)/0.224    
+        return letter_img,in_img,org_img, r, dw, dh   
+     
+         
+        
+        
+        
+    
diff --git a/models/cv/ocr/dbnet/ixrt/util/eval_det_iou.py b/models/cv/ocr/dbnet/ixrt/util/eval_det_iou.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b1a1702e66496f15ec8ff967ac8fe90ab7aec4f
--- /dev/null
+++ b/models/cv/ocr/dbnet/ixrt/util/eval_det_iou.py
@@ -0,0 +1,289 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from collections import namedtuple
+import numpy as np
+from shapely.geometry import Polygon
+import glob
+import os 
+"""
+reference from :
+https://github.com/MhLiao/DB/blob/3c32b808d4412680310d3d28eeb6a2d5bf1566c5/concern/icdar2015_eval/detection/iou.py#L8
+"""
+
+
+
+class DetectionIoUEvaluator(object):
+    def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5):
+        self.iou_constraint = iou_constraint
+        self.area_precision_constraint = area_precision_constraint
+
+    def evaluate_image(self, gt, pred):
+        def get_union(pD, pG):
+            return Polygon(pD).union(Polygon(pG)).area
+
+        def get_intersection_over_union(pD, pG):
+            return get_intersection(pD, pG) / get_union(pD, pG)
+
+        def get_intersection(pD, pG):
+            return Polygon(pD).intersection(Polygon(pG)).area
+
+        def compute_ap(confList, matchList, numGtCare):
+            correct = 0
+            AP = 0
+            if len(confList) > 0:
+                confList = np.array(confList)
+                matchList = np.array(matchList)
+                sorted_ind = np.argsort(-confList)
+                confList = confList[sorted_ind]
+                matchList = matchList[sorted_ind]
+                for n in range(len(confList)):
+                    match = matchList[n]
+                    if match:
+                        correct += 1
+                        AP += float(correct) / (n + 1)
+
+                if numGtCare > 0:
+                    AP /= numGtCare
+
+            return AP
+
+        perSampleMetrics = {}
+
+        matchedSum = 0
+
+        Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+
+        numGlobalCareGt = 0
+        numGlobalCareDet = 0
+
+        arrGlobalConfidences = []
+        arrGlobalMatches = []
+
+        recall = 0
+        precision = 0
+        hmean = 0
+
+        detMatched = 0
+
+        iouMat = np.empty([1, 1])
+
+        gtPols = []
+        detPols = []
+
+        gtPolPoints = []
+        detPolPoints = []
+
+        # Array of Ground Truth Polygons' keys marked as don't Care
+        gtDontCarePolsNum = []
+        # Array of Detected Polygons' matched with a don't Care GT
+        detDontCarePolsNum = []
+
+        pairs = []
+        detMatchedNums = []
+
+        arrSampleConfidences = []
+        arrSampleMatch = []
+
+        evaluationLog = ""
+
+        for n in range(len(gt)):
+            points = gt[n]['points']
+            dontCare = gt[n]['ignore']
+            if not Polygon(points).is_valid:
+                continue
+
+            gtPol = points
+            gtPols.append(gtPol)
+            gtPolPoints.append(points)
+            if dontCare:
+                gtDontCarePolsNum.append(len(gtPols) - 1)
+
+        evaluationLog += "GT polygons: " + str(len(gtPols)) + (
+            " (" + str(len(gtDontCarePolsNum)) + " don't care)\n"
+            if len(gtDontCarePolsNum) > 0 else "\n")
+
+        for n in range(len(pred)):
+            points = pred[n]['points']
+            if not Polygon(points).is_valid:
+                continue
+
+            detPol = points
+            detPols.append(detPol)
+            detPolPoints.append(points)
+            if len(gtDontCarePolsNum) > 0:
+                for dontCarePol in gtDontCarePolsNum:
+                    dontCarePol = gtPols[dontCarePol]
+                    intersected_area = get_intersection(dontCarePol, detPol)
+                    pdDimensions = Polygon(detPol).area
+                    precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                    if (precision > self.area_precision_constraint):
+                        detDontCarePolsNum.append(len(detPols) - 1)
+                        break
+
+        evaluationLog += "DET polygons: " + str(len(detPols)) + (
+            " (" + str(len(detDontCarePolsNum)) + " don't care)\n"
+            if len(detDontCarePolsNum) > 0 else "\n")
+
+        if len(gtPols) > 0 and len(detPols) > 0:
+            # Calculate IoU and precision matrixs
+            outputShape = [len(gtPols), len(detPols)]
+            iouMat = np.empty(outputShape)
+            gtRectMat = np.zeros(len(gtPols), np.int8)
+            detRectMat = np.zeros(len(detPols), np.int8)
+            for gtNum in range(len(gtPols)):
+                for detNum in range(len(detPols)):
+                    pG = gtPols[gtNum]
+                    pD = detPols[detNum]
+                    iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG)
+
+            for gtNum in range(len(gtPols)):
+                for detNum in range(len(detPols)):
+                    if gtRectMat[gtNum] == 0 and detRectMat[
+                            detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
+                        if iouMat[gtNum, detNum] > self.iou_constraint:
+                            gtRectMat[gtNum] = 1
+                            detRectMat[detNum] = 1
+                            detMatched += 1
+                            pairs.append({'gt': gtNum, 'det': detNum})
+                            detMatchedNums.append(detNum)
+                            evaluationLog += "Match GT #" + \
+                                             str(gtNum) + " with Det #" + str(detNum) + "\n"
+
+        numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+        numDetCare = (len(detPols) - len(detDontCarePolsNum))
+        if numGtCare == 0:
+            recall = float(1)
+            precision = float(0) if numDetCare > 0 else float(1)
+        else:
+            recall = float(detMatched) / numGtCare
+            precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
+
+        hmean = 0 if (precision + recall) == 0 else 2.0 * \
+                                                    precision * recall / (precision + recall)
+
+        matchedSum += detMatched
+        numGlobalCareGt += numGtCare
+        numGlobalCareDet += numDetCare
+
+        perSampleMetrics = {
+            'gtCare': numGtCare,
+            'detCare': numDetCare,
+            'detMatched': detMatched,
+        }
+        return perSampleMetrics
+
+    def combine_results(self, results):
+        numGlobalCareGt = 0
+        numGlobalCareDet = 0
+        matchedSum = 0
+        for result in results:
+            numGlobalCareGt += result['gtCare']
+            numGlobalCareDet += result['detCare']
+            matchedSum += result['detMatched']
+
+        methodRecall = 0 if numGlobalCareGt == 0 else float(
+            matchedSum) / numGlobalCareGt
+        methodPrecision = 0 if numGlobalCareDet == 0 else float(
+            matchedSum) / numGlobalCareDet
+        methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \
+                                                                    methodRecall * methodPrecision / (
+                                                                            methodRecall + methodPrecision)
+        methodMetrics = {
+            'precision': methodPrecision,
+            'recall': methodRecall,
+            'hmean': methodHmean
+        }
+
+        return methodMetrics
+    
+def read_label(label_file):
+    #gts = []
+    one_gt = []
+    with open(label_file) as f:
+        lines = f.readlines()
+        
+        
+        for line in lines:
+            one_res={}    
+            cord,label = line.strip().split(",")[:8],line.strip().split(",")[-1]
+            cord=[int(x) for x in cord] 
+            one_res["points"]=[(cord[0],cord[1]),(cord[2],cord[3]),(cord[4],cord[5]),(cord[6],cord[7])]
+            one_res["text"]=label
+            if label=="###":
+                one_res["ignore"] =True
+            else:
+                one_res["ignore"] =False
+            one_gt.append(one_res)
+        #gts.append(one_gt)       
+    return one_gt       
+    
+        
+
+
+if __name__ == '__main__':
+    evaluator = DetectionIoUEvaluator()
+    
+    gt_files = glob.glob("/home/fangjian.hu/workspace/ixrt/test_data/MSRA/test_labels_icdar/*")
+    pred_path = "/home/fangjian.hu/workspace/ixrt/test_data/MSRA_pred_dt/"
+    
+    gts =[]
+    preds=[]
+    
+    for gt_file in gt_files:
+        label_name = os.path.split(gt_file)[-1]
+        pred_file = os.path.join(pred_path,label_name)        
+        one_gt= read_label(gt_file)
+        one_pred= read_label(pred_file)
+        gts.append(one_gt)
+        preds.append(one_pred)
+        
+    results = []
+    for gt, pred in zip(gts, preds):
+        results.append(evaluator.evaluate_image(gt, pred))
+    metrics = evaluator.combine_results(results)
+    print(metrics)
+        
+        
+        
+     
+                
+                                        
+                
+            
+
+    
+    
+
+    
+    # for item in data["label"]:
+    #     print(item)
+    #     if item["transcription"]!="###":
+    #         print(item)
+    
+
+
+    
+            
+        
+    
+    
+    
+    # gts = [[{
+    #     'points': [(0, 0), (1, 0), (1, 1), (0, 1)],
+    #     'text': 1234,
+    #     'ignore': False,
+    # }, {
+    #     'points': [(2, 2), (3, 2), (3, 3), (2, 3)],
+    #     'text': 5678,
+    #     'ignore': False,
+    # }]]
+    # preds = [[{
+    #     'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
+    #     'text': 123,
+    #     'ignore': False,
+    # }]]
+    # results = []
+    # for gt, pred in zip(gts, preds):
+    #     results.append(evaluator.evaluate_image(gt, pred))
+    # metrics = evaluator.combine_results(results)
+    # print(metrics)
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/README.md b/models/cv/semantic_segmentation/ddrnet/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8ce594e2a8341dfb5823e34af80700550a54056f
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/README.md
@@ -0,0 +1,66 @@
+# DDRNet (ixRT)
+
+## Model Description
+
+DDRNet (Dual Resolution Network) is a real-time semantic segmentation network that learns rich representations through bilateral detail preservation and deep aggregation for high-resolution image understanding.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.4.0 | 26.06 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <http://files.deepspark.org.cn:880/deepspark/data/checkpoints/ddrnet23.onnx>
+
+Dataset: <https://www.cityscapes-dataset.com> to download the dataset.
+
+### Install Dependencies
+
+```bash
+pip3 install xtcocotools tqdm munkres onnxsim opencv-python==4.6.0.66
+```
+
+### Model Conversion
+
+```bash
+mkdir checkpoints
+cd checkpoints
+wget http://files.deepspark.org.cn:880/deepspark/data/checkpoints/ddrnet23.onnx
+```
+
+## Model Inference
+
+```bash
+export DATASETS_DIR=/Path/to/cityscapes/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=./
+```
+
+### FP16
+
+```bash
+# Test ACC (mIoU)
+bash scripts/infer_ddrnet_fp16_accuracy.sh
+# Test FPS
+bash scripts/infer_ddrnet_fp16_performance.sh
+```
+
+### INT8
+
+```bash
+# Test ACC (mIoU)
+bash scripts/infer_ddrnet_int8_accuracy.sh
+# Test FPS
+bash scripts/infer_ddrnet_int8_performance.sh
+```
+
+## Model Results
+
+| Model  | BatchSize | Precision | FPS     | mIoU(%) | mAcc(%) |
+| ------ | --------- | --------- | ------- | ------- | ------- |
+| DDRNet | 4         | FP16      |  98.278  | 12.8    | 25.8    |
+| DDRNet | 4         | INT8      | 123.94  | 12.9    | 25.6    |
\ No newline at end of file
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/build_engine.py b/models/cv/semantic_segmentation/ddrnet/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..5acad1d95173f735a75f72ed2e92b4972a44d2be
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/build_engine.py
@@ -0,0 +1,68 @@
+import os
+import json
+import onnx
+import logging
+import argparse
+import ctypes
+from os.path import join, dirname, exists
+
+import tensorrt
+
+def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""):
+    if not dynamic_path:
+        dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")
+    if not exists(dynamic_path):
+        raise FileNotFoundError(
+            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
+    ctypes.CDLL(dynamic_path)
+    tensorrt.init_libnvinfer_plugins(logger, namespace)
+    print(f"Loaded plugin from {dynamic_path}")
+
+load_ixrt_plugin()
+
+
+def build_engine_trtapi(config):
+    onnx_model = config.model
+    assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!"
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+
+    parser.parse_from_file(onnx_model)
+    if config.precision == "int8":
+        build_config.set_flag(tensorrt.BuilderFlag.INT8)
+        build_config.set_flag(tensorrt.BuilderFlag.FP16)
+    else: 
+        build_config.set_flag(tensorrt.BuilderFlag.FP16)
+
+    plan = builder.build_serialized_network(network, build_config)
+    with open(config.engine, "wb") as f:
+        f.write(plan)
+
+    print("Build engine done!")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str,  default="ddrnet23.onnx")
+    parser.add_argument("--bsz", type=int, default=4, help="batch size")
+    parser.add_argument("--precision", type=str, choices=["float16", "int8"], default="int8", help="The precision of datatype")
+    parser.add_argument("--imgsz_h", type=int, default=1024, help="inference size h")
+    parser.add_argument("--imgsz_w", type=int, default=2048, help="inference size w")
+    # engine args
+    parser.add_argument("--engine", type=str, default=None)
+    # device
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    config = parse_args()
+    build_engine_trtapi(config)
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/ci/prepare.sh b/models/cv/semantic_segmentation/ddrnet/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e18dd4eff25fbf6adf526edff576c39246b6a67e
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/ci/prepare.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+pip3 install xtcocotools tqdm munkres onnxsim opencv-python==4.6.0.66
+
+mkdir -p checkpoints
+cp /root/data/checkpoints/ddrnet23.onnx checkpoints/
\ No newline at end of file
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/deploy.py b/models/cv/semantic_segmentation/ddrnet/ixrt/deploy.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c68506dcc96fd3ef9af0f0417d8909a6ea4d72e
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/deploy.py
@@ -0,0 +1,127 @@
+import os
+import cv2
+import argparse
+import numpy as np
+import torch
+
+from utils import input_transform
+
+from tensorrt import IxRT
+from ixrt.common import RuntimeConfig, RuntimeContext
+from tensorrt.deploy.api import *
+
+
+def create_runtime_from_model(args):
+    model = args.model
+    quant_file = args.quant_file
+    precision = args.precision
+
+    config = RuntimeConfig()
+    config.input_shapes = [("inputx", [args.bsz, 3, args.imgsz_h, args.imgsz_w])]
+    config.device_idx = args.device
+    if precision == "int8":
+        assert os.path.isfile(quant_file), "Quant file must provided for int8 inferencing"   
+
+    config.runtime_context = RuntimeContext(
+        precision,
+        "nhwc",
+        use_gpu=True,
+        pipeline_sync=True,
+        input_types={"inputx": "float32"},
+        output_types={"outputy": "float32"}
+    )
+    runtime = IxRT.from_onnx(model, quant_file, config)
+    runtime.Init(runtime.config)
+    return runtime
+
+
+def create_runtime_from_engine(engine):
+    runtime = IxRT()
+    runtime.LoadEngine(engine)
+    return runtime
+
+
+def pre_process(img_file):
+    assert os.path.isfile(img_file), "The input file {img_file} must be existed!"
+    img = cv2.imread(img_file, cv2.IMREAD_COLOR)
+    img = input_transform(
+        img, 
+        mean=[0.485, 0.456, 0.406], 
+        std=[0.229, 0.224, 0.225]
+    )
+    return img
+
+
+def main(args):
+    print(args)
+    img_file = args.img_file
+    if args.engine is not None:
+        runtime = create_runtime_from_engine(args.engine)
+    else:
+        runtime = create_runtime_from_model(args)
+
+    input_map = runtime.GetInputShape()
+    output_map = runtime.GetOutputShape()
+    print(f"input map is: {input_map}")    
+    print(f"output map is: {output_map}")    
+ 
+    input_io_buffers = []
+    output_io_buffers = []
+    for name, shape in input_map.items():
+        # 1. apply memory buffer for input of the shape, based on shape and padding
+        _shape, _padding = shape.dims, shape.padding
+        _shape = [i + j for i, j in zip(_shape, _padding)]
+        _shape = [_shape[0], *_shape[2:4], _shape[1]]
+        # currently we only support float32 as I/O
+        buffer = np.zeros(_shape, dtype=np.float32)
+        # 2. load image to the buffer, TODO batch load
+        img = pre_process(img_file)
+        print("image shape is:", img.shape)
+
+        buffer[0, :, :, :3] = img
+        print(f"Allocated input buffer:{_shape}")
+    
+        # 3. put the buffer to a list
+        input_io_buffers.append([name, buffer, shape])
+
+    for name, shape in output_map.items():
+        # 1. apply memory buffer for output of the shape 
+        # output_buffer = np.zeros(shape.dims, dtype=np.float32) 
+        bs, c, h, w = shape.dims
+        dims = [bs, h, w, c]
+
+        output_buffer = np.zeros(dims, dtype=np.float32)
+        # 2. put the buffer to a list
+        output_io_buffers.append([name, output_buffer, shape])
+    
+    runtime.LoadInput(input_io_buffers)
+    runtime.Execute()
+    runtime.FetchOutput(output_io_buffers)
+    
+    print(f"Test Achieved!")    
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str,  default="ddrnet23.onnx")
+    parser.add_argument("--quant_file", type=str, default=None, help="the json of quantization")
+    parser.add_argument("--bsz", type=int, default=4, help="batch size")
+    parser.add_argument("--precision", type=str, choices=["float16", "int8"], default="int8", help="The precision of datatype")
+    parser.add_argument("--warm_up", type=int, default=5, help="warm_up count")
+    parser.add_argument("--imgsz_h", type=int, default=1024, help="inference size h")
+    parser.add_argument("--imgsz_w", type=int, default=2048, help="inference size w")
+    # engine args
+    parser.add_argument("--engine", type=str, default=None)
+    parser.add_argument("--img_file", type=str, default=None)
+    # device
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/inference.py b/models/cv/semantic_segmentation/ddrnet/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb178a2bd34dbdb7a2d19c6f56e1e6afb71c8efb
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/inference.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import glob
+import json
+import os
+import sys
+import time
+import random
+import ctypes
+import numpy as np
+from os.path import join, dirname, exists
+
+from tqdm import tqdm
+
+from utils import Dataset, get_confusion_matrix
+import tensorrt
+import cuda.cuda as cuda
+import cuda.cudart as cudart
+
+def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""):
+    if not dynamic_path:
+        dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")
+    if not exists(dynamic_path):
+        raise FileNotFoundError(
+            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
+    ctypes.CDLL(dynamic_path)
+    tensorrt.init_libnvinfer_plugins(logger, namespace)
+    print(f"Loaded plugin from {dynamic_path}")
+
+load_ixrt_plugin()
+
+def create_engine_context(config):
+    engine_path = config.engine_file
+    datatype = tensorrt.DataType.FLOAT
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+    with open(engine_path, "rb") as f, tensorrt.Runtime(logger) as runtime:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+
+def setup_io_bindings(engine, context):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = context.get_binding_shape(i)
+        
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
+
+def check_target(inference, target):
+    satisfied = False
+    if inference > target:
+        satisfied = True
+    return satisfied
+
+
+def test_mIoU_mAcc(dataset, config):
+    
+    confusion_matrix = np.zeros((config.num_classes, config.num_classes))
+    
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+    
+    engine, context = create_engine_context(config)
+    inputs, outputs, allocations = setup_io_bindings(engine, context)
+    
+    run_times = []
+    
+    for i, element in tqdm(enumerate(dataset), desc="Testing mIoU and mAcc"):
+        start_time = time.time()
+        img, label, pad_size, name = element
+        img = np.ascontiguousarray(img.transpose((0,3,1,2)))
+        b, c, h, w = img.shape
+        
+        output = np.zeros([b, 32, h, w], outputs[0]["dtype"])
+        err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], img, img.nbytes)
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+        context.execute_v2(allocations)
+        err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"])
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+        
+        pred = output[:, :config.num_classes, :, :]
+        # flip test
+        if config.flip:
+            flip_img = img.copy()[:, :, :, ::-1]
+
+            err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], img, img.nbytes)
+            assert(err == cuda.CUresult.CUDA_SUCCESS)
+            context.execute_v2(allocations)
+            err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"])
+            assert(err == cuda.CUresult.CUDA_SUCCESS)
+            flip_pred = output[:, :config.num_classes, :, :]
+            
+            pred += flip_pred
+            out = np.exp(pred * 0.5)
+        else:
+            out = np.exp(pred)
+            
+        out = out.transpose((0,2,3,1))
+
+        for j in range(b):
+            confusion_matrix += get_confusion_matrix(
+                label[j:j+1], 
+                out[j:j+1], 
+                pad_size[j], 
+                config.num_classes, 
+                config.ignore_label
+                )
+        
+        end_time = time.time()
+        run_times.append(end_time - start_time)
+        
+        num_imgs = i * config.bsz 
+        if num_imgs % 100 == 0:
+            print(f"[INFO] processing: {num_imgs} images")
+            pos = confusion_matrix.sum(1)
+            res = confusion_matrix.sum(0)
+            tp = np.diag(confusion_matrix)
+            IoU_array = (tp / np.maximum(1.0, pos + res - tp))
+            mean_IoU = IoU_array.mean()
+            print("[INFO] mIoU: %.4f" % (mean_IoU))
+            
+    pos = confusion_matrix.sum(1)
+    res = confusion_matrix.sum(0)
+    tp = np.diag(confusion_matrix)
+    pixel_acc = tp.sum() / pos.sum()
+    mean_acc = (tp / np.maximum(1.0, pos)).mean()
+    IoU_array = (tp / np.maximum(1.0, pos + res - tp))
+    mean_IoU = IoU_array.mean()
+
+    # Calculate FPS
+    run_times.remove(max(run_times))
+    run_times.remove(min(run_times))
+    avg_time = sum(run_times) / len(run_times)
+    fps = 1. / avg_time 
+    print(f"Executing Done, Time: {avg_time}, FPS: {fps}, mIoU: {mean_IoU}, mAcc: {mean_acc}")
+    print(f"Class IoU:")
+    print(f"{IoU_array}")
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["mIoU"] = round(mean_IoU, 3)
+    metricResult["metricResult"]["mAcc"] = round(mean_acc, 3)
+    print(metricResult)
+    return mean_IoU, mean_acc
+        
+
+def test_fps(config, loop_count, dataset):
+    
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+    
+    engine, context = create_engine_context(config)
+    inputs, outputs, allocations = setup_io_bindings(engine, context)
+
+    run_times = []
+    
+    if config.warm_up > 0:
+        print("\nWarm Start.")
+        for i in range(config.warm_up):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+    
+    batch_data0 = dataset[0]
+    for i in range(loop_count):
+        img, label, pad_size, name = batch_data0
+        b, h, w, c = img.shape
+        output = np.zeros([b, 32, h, w], outputs[0]["dtype"])
+        img = np.ascontiguousarray(img.transpose((0,3,1,2))) 
+        err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], img, img.nbytes)
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+        start_time = time.time()
+        context.execute_v2(allocations)
+        end_time = time.time()
+        err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"])
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+
+        temp_time = end_time - start_time
+        fps = b / temp_time
+        print(f"time: {temp_time}, fps: {fps}")
+        run_times.append(temp_time)
+            
+    # Calculate FPS
+    run_times.remove(max(run_times))
+    run_times.remove(min(run_times))
+
+    avg_time = sum(run_times) / len(run_times)
+    fps = b / avg_time 
+    print(f"Executing {loop_count} done, Time: {avg_time}, FPS: {fps}")
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["FPS"] = round(fps, 3)
+    print(metricResult)
+    return fps
+
+
+def main(config):
+
+    num_samples = 1
+    bsz = config.bsz
+    if config.loop_count > 0:
+        num_samples = bsz * config.loop_count
+    num_batch = (num_samples + bsz - 1) // bsz
+    
+    dataset = Dataset(
+            root=config.dataset_dir, 
+            list_path=config.list_path, 
+            batch_size=config.bsz,
+            ignore_label=255
+        )
+    
+    if config.test_mode == "MIOU":
+        mIoU, mAcc = test_mIoU_mAcc(dataset, config)       
+        status_mIoU_mAcc = check_target(mIoU, config.target_mIoU) and check_target(mAcc, config.target_mAcc)
+        sys.exit(int(not (status_mIoU_mAcc)))
+    
+    elif config.test_mode == "FPS":
+        # Warm up
+        fps = test_fps(config, config.loop_count, dataset)    
+        status_fps = check_target(fps, config.target_fps)
+        sys.exit(int(not (status_fps)))
+        
+    
+def parse_config():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_type",
+        type=str,
+        default="DDRNET",
+        help="The semantic segmentation(ddrnet)",
+    )
+    parser.add_argument("--engine_file", type=str, help="engine file path")
+    parser.add_argument("--test_mode", type=str, default="MIOU", help="FPS MIOU")
+    parser.add_argument(
+        "--dataset_dir",
+        type=str,
+        default="/root/data/datasets",
+        help="The directory of dataset(cityscapes)",
+    )
+    parser.add_argument(
+        "--list_path",
+        type=str,
+        default="/root/data/datasets/cityscapes/val.lst",
+        help="The val name list of dataset(cityscapes)",
+    )
+    parser.add_argument("--warm_up", type=int, default=5, help="warm_up count")
+    parser.add_argument("--flip", action='store_true', help="Flip test")
+    parser.add_argument("--bsz", type=int, default=4, help="batch size")
+    parser.add_argument("--num_classes", type=int, default=19, help="the category of dataset")
+    parser.add_argument("--ignore_label", type=int, default=255, help="the category of not used in calculate confusion matrix")
+    parser.add_argument("--imgsz_h", type=int, default=1024, help="inference size h")
+    parser.add_argument("--imgsz_w", type=int, default=2048, help="inference size w")
+    parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs")
+    parser.add_argument("--target_fps", type=float, default=-1.0)
+    parser.add_argument("--target_mIoU", type=float, default=-1.0)
+    parser.add_argument("--target_mAcc", type=float, default=-1.0)
+    parser.add_argument("--loop_count", type=int, default=12)
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+
+    config = parser.parse_args()
+    return config
+
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/quant.py b/models/cv/semantic_segmentation/ddrnet/ixrt/quant.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5347f66fe861ee176434f89d56b31c5db30de7c
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/quant.py
@@ -0,0 +1,70 @@
+import os
+import cv2
+import random
+import argparse
+import numpy as np
+from random import shuffle
+from utils import input_transform
+from tensorrt.deploy import static_quantize
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str,  default="ddrnet23.onnx")
+    parser.add_argument("--dataset_dir", type=str,  default="/root/data/datasets/cityscapes")
+    parser.add_argument("--list_path", type=str,  default="/root/data/datasets/cityscapes/val.lst", help="The path of val list.")
+    parser.add_argument("--save_dir", type=str,  help="quant file", default=None)
+    args = parser.parse_args()
+    return args
+
+
+def getdataloader(datadir, list_path, step=32, batch_size=4):
+    num = step * batch_size
+    
+    img_list = [line.strip().split()[0] for line in open(list_path)]
+    val_list = [os.path.join(datadir, x) for x in img_list]
+    random.shuffle(val_list)
+    pic_list = val_list[:num]
+
+    dataloader = []
+    # imgsz = (1024, 2048)
+    for file_path in pic_list:
+        img = cv2.imread(file_path, cv2.IMREAD_COLOR)
+        img = input_transform(
+            img, 
+            mean=[0.485, 0.456, 0.406], 
+            std=[0.229, 0.224, 0.225]
+        )
+        img = img.transpose((2, 0, 1))
+        dataloader.append(img)
+    
+    calibration_dataset = dataloader
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=True,
+        batch_size=batch_size,
+        drop_last=True
+    )
+    return calibration_dataloader 
+
+
+args = parse_args()
+model_name = os.path.basename(args.model)
+model_name = model_name.rsplit(".", maxsplit=1)[0]
+
+out_dir = os.path.dirname(args.model)
+dataloader = getdataloader(args.dataset_dir, args.list_path)
+
+static_quantize(args.model,
+        calibration_dataloader=dataloader,
+        save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"),
+        save_quant_params_path=os.path.join(out_dir, f"quantized_ddrnet23.json"),
+        observer="percentile",
+        analyze=True, 
+        quant_format="qdq",
+        data_preprocess=lambda x: x.to("cuda"),
+    )
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_fp16_accuracy.sh b/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e5d96ce25ad3ce55973441f78f755756df621bb6
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_fp16_accuracy.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -e
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+MODEL_NAME="ddrnet"
+BSZ=4
+PRECISION="float16"
+DEVICE=0
+FORCE_BUILD=0
+TGT_0=-1
+TGT_1=-1
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      -p | --precision) PRECISION=${arguments[index]};;
+      -d | --device) DEVICE=${arguments[index]};;
+      --bs) BSZ=${arguments[index]};;
+      --tgt_iou) TGT_0=${arguments[index]};;
+      --tgt_acc) TGT_1=${arguments[index]};;
+      -f | --force) FORCE_BUILD=1;;
+    esac
+done
+
+CHECKPOINTS_DIR="./checkpoints"
+DATASET_DIR="/root/data/datasets"
+LIST_PATH="/root/data/datasets/cityscapes/val.lst"
+RUN_DIR="${RUN_DIR:-.}"
+ORIGINE_MODEL="${CHECKPOINTS_DIR}/ddrnet23.onnx"
+
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+echo;
+
+function run_cmd()
+{
+    echo "[CMD]: $@"
+    eval $@
+}
+
+step=1
+
+# Simplify Model
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    run_cmd python3 ${RUN_DIR}/sim_onnx_model.py    \
+                --raw_model_path ${ORIGINE_MODEL}   \
+                --sim_model_path ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+let step++
+echo;
+
+# Build Engine
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ ${FORCE_BUILD} -eq 1 ] && [ -e ${ENGINE_FILE} ];then
+    rm ${ENGINE_FILE}
+fi
+echo "Building engine(${PRECISION})"
+if [ -e ${ENGINE_FILE} ];then
+    echo "  "Build Engine Skip, ${ENGINE_FILE} has been existed
+else
+    run_cmd python3 ${RUN_DIR}/build_engine.py  \
+        --model ${SIM_MODEL}                    \
+        --bsz ${BSZ}                            \
+        --precision ${PRECISION}                \
+        --engine ${ENGINE_FILE}                 \
+        --device ${DEVICE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+let step++
+echo;
+
+# Inference
+echo [STEP ${step}] : Inference
+run_cmd python3 ${RUN_DIR}/inference.py     \
+            --model_type "DDRNET23"         \
+            --engine_file ${ENGINE_FILE}    \
+            --test_mode MIOU                \
+            --dataset_dir ${DATASET_DIR}    \
+            --list_path ${LIST_PATH}        \
+            --flip                          \
+            --bsz ${BSZ}                    \
+            --target_mIoU ${TGT_0}          \
+            --target_mAcc ${TGT_1}          \
+            --loop_count -1                 \
+            --device ${DEVICE}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_fp16_performance.sh b/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..42c4b2c751dc7d7f8cc4743e0fb3374c46b7f2c6
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_fp16_performance.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -e
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+MODEL_NAME="ddrnet"
+BSZ=4
+PRECISION="float16"
+DEVICE=0
+FORCE_BUILD=0
+TGT=1
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      -p | --precision) PRECISION=${arguments[index]};;
+      -d | --device) DEVICE=${arguments[index]};;
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+      -f | --force) FORCE_BUILD=1;;
+    esac
+done
+
+CHECKPOINTS_DIR="./checkpoints"
+DATASET_DIR="/root/data/datasets"
+LIST_PATH="/root/data/datasets/cityscapes/val.lst"
+RUN_DIR="${RUN_DIR:-.}"
+ORIGINE_MODEL="${CHECKPOINTS_DIR}/ddrnet23.onnx"
+
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+echo;
+
+function run_cmd()
+{
+    echo "[CMD]: $@"
+    eval $@
+}
+
+step=1
+
+# Simplify Model
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    run_cmd python3 ${RUN_DIR}/sim_onnx_model.py    \
+                --raw_model_path ${ORIGINE_MODEL}   \
+                --sim_model_path ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+let step++
+echo;
+
+# Build Engine
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ ${FORCE_BUILD} -eq 1 ] && [ -e ${ENGINE_FILE} ];then
+    rm ${ENGINE_FILE}
+fi
+echo "Building engine(${PRECISION})"
+if [ -e ${ENGINE_FILE} ];then
+    echo "  "Build Engine Skip, ${ENGINE_FILE} has been existed
+else
+    run_cmd python3 ${RUN_DIR}/build_engine.py  \
+        --model ${SIM_MODEL}                    \
+        --bsz ${BSZ}                            \
+        --precision ${PRECISION}                \
+        --engine ${ENGINE_FILE}                 \
+        --device ${DEVICE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+let step++
+echo;
+
+# Inference
+echo [STEP ${step}] : Inference
+run_cmd python3 ${RUN_DIR}/inference.py     \
+        --model_type "DDRNET23"             \
+        --engine_file ${ENGINE_FILE}        \
+        --test_mode FPS                     \
+        --dataset_dir ${DATASET_DIR}        \
+        --list_path ${LIST_PATH}            \
+        --target_fps ${TGT}                 \
+        --loop_count 12                     \
+        --device ${DEVICE}
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_int8_accuracy.sh b/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_int8_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b3e59bb86165629138bd4d6243ac9aa17d30df5f
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_int8_accuracy.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -e
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+MODEL_NAME="ddrnet"
+BSZ=4
+PRECISION="int8"
+DEVICE=0
+FORCE_BUILD=0
+TGT_0=-1
+TGT_1=-1
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      -p | --precision) PRECISION=${arguments[index]};;
+      -d | --device) DEVICE=${arguments[index]};;
+      --bs) BSZ=${arguments[index]};;
+      --tgt_iou) TGT_0=${arguments[index]};;
+      --tgt_acc) TGT_1=${arguments[index]};;
+      -f | --force) FORCE_BUILD=1;;
+    esac
+done
+
+CHECKPOINTS_DIR="./checkpoints"
+DATASET_DIR="/root/data/datasets"
+LIST_PATH="/root/data/datasets/cityscapes/val.lst"
+RUN_DIR="${RUN_DIR:-.}"
+ORIGINE_MODEL="${CHECKPOINTS_DIR}/ddrnet23.onnx"
+
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+echo;
+
+function run_cmd()
+{
+    echo "[CMD]: $@"
+    eval $@
+}
+
+step=1
+
+# Simplify Model
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    run_cmd python3 ${RUN_DIR}/sim_onnx_model.py    \
+                --raw_model_path ${ORIGINE_MODEL}   \
+                --sim_model_path ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+let step++
+echo;
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    echo [STEP ${step}] : Quant Model
+    QUANT_MODEL=${CHECKPOINTS_DIR}/quantized_${MODEL_NAME}_sim.onnx
+    if [ -f ${QUANT_MODEL} ];then
+        echo "  "Quant Model Skip, ${QUANT_MODEL} has been existed
+    else
+        run_cmd python3 ${RUN_DIR}/quant.py        \
+            --model ${SIM_MODEL}                   \
+            --dataset_dir ${DATASET_DIR}/cityscapes           \
+            --save_dir ${CHECKPOINTS_DIR}
+        echo "  "Generate ${QUANT_MODEL}
+    fi
+    SIM_MODEL=${QUANT_MODEL}
+    let step++
+    echo;
+fi
+
+# Build Engine
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ ${FORCE_BUILD} -eq 1 ] && [ -e ${ENGINE_FILE} ];then
+    rm ${ENGINE_FILE}
+fi
+echo "Building engine(${PRECISION})"
+if [ -e ${ENGINE_FILE} ];then
+    echo "  "Build Engine Skip, ${ENGINE_FILE} has been existed
+else
+    run_cmd python3 ${RUN_DIR}/build_engine.py  \
+        --model ${SIM_MODEL}                    \
+        --bsz ${BSZ}                            \
+        --precision ${PRECISION}                \
+        --engine ${ENGINE_FILE}                 \
+        --device ${DEVICE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+let step++
+echo;
+
+# Inference
+echo [STEP ${step}] : Inference
+run_cmd python3 ${RUN_DIR}/inference.py     \
+            --model_type "DDRNET23"         \
+            --engine_file ${ENGINE_FILE}    \
+            --test_mode MIOU                \
+            --dataset_dir ${DATASET_DIR}    \
+            --list_path ${LIST_PATH}        \
+            --flip                          \
+            --bsz ${BSZ}                    \
+            --target_mIoU ${TGT_0}          \
+            --target_mAcc ${TGT_1}          \
+            --loop_count -1                 \
+            --device ${DEVICE}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_int8_performance.sh b/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_int8_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..36fecd59d4b94dca203d5b006811fbd24603ffa0
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/scripts/infer_ddrnet_int8_performance.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -e
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+MODEL_NAME="ddrnet"
+BSZ=4
+PRECISION="int8"
+DEVICE=0
+FORCE_BUILD=0
+TGT=1
+
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      -p | --precision) PRECISION=${arguments[index]};;
+      -d | --device) DEVICE=${arguments[index]};;
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+      -f | --force) FORCE_BUILD=1;;
+    esac
+done
+
+CHECKPOINTS_DIR="./checkpoints"
+DATASET_DIR="/root/data/datasets"
+LIST_PATH="/root/data/datasets/cityscapes/val.lst"
+RUN_DIR="${RUN_DIR:-.}"
+ORIGINE_MODEL="${CHECKPOINTS_DIR}/ddrnet23.onnx"
+
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+echo;
+
+function run_cmd()
+{
+    echo "[CMD]: $@"
+    eval $@
+}
+
+step=1
+
+# Simplify Model
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    run_cmd python3 ${RUN_DIR}/sim_onnx_model.py    \
+                --raw_model_path ${ORIGINE_MODEL}   \
+                --sim_model_path ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+let step++
+echo;
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    echo [STEP ${step}] : Quant Model
+    QUANT_MODEL=${CHECKPOINTS_DIR}/quantized_${MODEL_NAME}_sim.onnx
+    if [ -f ${QUANT_MODEL} ];then
+        echo "  "Quant Model Skip, ${QUANT_MODEL} has been existed
+    else
+        run_cmd python3 ${RUN_DIR}/quant.py        \
+            --model ${SIM_MODEL}                   \
+            --dataset_dir ${DATASET_DIR}/cityscapes           \
+            --save_dir ${CHECKPOINTS_DIR}
+        echo "  "Generate ${QUANT_MODEL}
+    fi
+    SIM_MODEL=${QUANT_MODEL}
+    let step++
+    echo;
+fi
+
+# Build Engine
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ ${FORCE_BUILD} -eq 1 ] && [ -e ${ENGINE_FILE} ];then
+    rm ${ENGINE_FILE}
+fi
+echo "Building engine(${PRECISION})"
+if [ -e ${ENGINE_FILE} ];then
+    echo "  "Build Engine Skip, ${ENGINE_FILE} has been existed
+else
+    run_cmd python3 ${RUN_DIR}/build_engine.py  \
+        --model ${SIM_MODEL}                    \
+        --bsz ${BSZ}                            \
+        --precision ${PRECISION}                \
+        --engine ${ENGINE_FILE}                 \
+        --device ${DEVICE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+let step++
+echo;
+
+# Inference
+echo [STEP ${step}] : Inference
+run_cmd python3 ${RUN_DIR}/inference.py     \
+        --model_type "DDRNET23"             \
+        --engine_file ${ENGINE_FILE}        \
+        --test_mode FPS                     \
+        --dataset_dir ${DATASET_DIR}        \
+        --list_path ${LIST_PATH}            \
+        --target_fps ${TGT}                 \
+        --loop_count 12                     \
+        --device ${DEVICE}
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/sim_onnx_model.py b/models/cv/semantic_segmentation/ddrnet/ixrt/sim_onnx_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..98aa36c21df3b4ab7194236db9b5006a84ad1dcb
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/sim_onnx_model.py
@@ -0,0 +1,17 @@
+import onnx
+import argparse
+from onnxsim import simplify
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--raw_model_path", type=str)
+    parser.add_argument("--sim_model_path", type=str)
+    args = parser.parse_args()
+    return args
+
+
+args = parse_args()
+onnx_model = onnx.load(args.raw_model_path)
+model_simp, check = simplify(onnx_model)
+onnx.save(model_simp, args.sim_model_path)
+print('Simplify onnx Done.')
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/utils/__init__.py b/models/cv/semantic_segmentation/ddrnet/ixrt/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f0a54fef9f0be1c2930e394c114fc24d32c9be7
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/utils/__init__.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+import numpy as np
+from .dataset import Dataset
+from .metrics import get_confusion_matrix
+
+
+def input_transform(image, mean, std):
+    image = image.astype(np.float32)[:, :, ::-1]
+    image = image / 255.0
+    image -= mean
+    image /= std
+    return image
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/utils/dataset.py b/models/cv/semantic_segmentation/ddrnet/ixrt/utils/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d137c4bfdaa3c37ead38a047f7ad814401e6a4b
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/utils/dataset.py
@@ -0,0 +1,139 @@
+import os
+import cv2
+import numpy as np
+from math import ceil
+from tqdm import tqdm
+
+
+class Dataset:
+    def __init__(self, 
+                 root, 
+                 list_path, 
+                 batch_size=4,
+                 num_classes=19,
+                 ignore_label=255, 
+                 base_size=2048, 
+                 crop_size=(512, 1024), 
+                 downsample_rate=1,
+                 scale_factor=16,
+                 mean=[0.485, 0.456, 0.406], 
+                 std=[0.229, 0.224, 0.225]):
+
+        self.root = root
+        self.list_path = list_path 
+        self.batch_size = batch_size
+        self.num_classes = num_classes 
+        self.mean = mean 
+        self.std = std
+        self.downsample_rate = downsample_rate
+
+        self.img_list = [line.strip().split() for line in open(list_path)]
+        self.files = self.read_files()
+        self.num_batches = ceil(len(self.files) / self.batch_size)
+
+        self.label_mapping = {-1: ignore_label, 0: ignore_label, 
+                              1: ignore_label, 2: ignore_label, 
+                              3: ignore_label, 4: ignore_label, 
+                              5: ignore_label, 6: ignore_label, 
+                              7: 0, 8: 1, 9: ignore_label, 
+                              10: ignore_label, 11: 2, 12: 3, 
+                              13: 4, 14: ignore_label, 15: ignore_label, 
+                              16: ignore_label, 17: 5, 18: ignore_label, 
+                              19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11,
+                              25: 12, 26: 13, 27: 14, 28: 15, 
+                              29: ignore_label, 30: ignore_label, 
+                              31: 16, 32: 17, 33: 18}
+        
+        self.batch_images, self.batch_labels, self.batch_sizes, self.batch_names = self.batching()
+
+    def read_files(self):
+        files = []
+        for i, item in enumerate(self.img_list):
+            image_path, label_path = item
+            name = os.path.splitext(os.path.basename(label_path))[0]
+            files.append({
+                "img": image_path,
+                "label": label_path,
+                "name": name,
+                "weight": 1
+            })
+            # if i == 4:
+            #    break
+        return files
+        
+    def input_transform(self, image):
+        image = image.astype(np.float32)[:, :, ::-1]
+        image = image / 255.0
+        image -= self.mean
+        image /= self.std
+        return image
+
+    def label_transform(self, label):
+        temp = label.copy()
+        for k, v in self.label_mapping.items():
+            label[temp == k] = v
+        return np.array(label).astype('int32')
+    
+    def gen_sample(self, image, label):
+
+        image = self.input_transform(image)
+        label = self.label_transform(label)
+
+        if self.downsample_rate != 1:
+            label = cv2.resize(
+                label,
+                None,
+                fx=self.downsample_rate,
+                fy=self.downsample_rate,
+                interpolation=cv2.INTER_NEAREST
+            )
+        return image, label
+
+    def _preprocess(self, index):
+        item = self.files[index]
+        name = item["name"]
+        image = cv2.imread(os.path.join(self.root,'cityscapes',item["img"]),
+                           cv2.IMREAD_COLOR)
+        size = image.shape
+        label = cv2.imread(os.path.join(self.root,'cityscapes',item["label"]),
+                           cv2.IMREAD_GRAYSCALE)
+        image, label = self.gen_sample(image, label)
+        return image.copy(), label.copy(), np.array(size), name 
+
+    def __len__(self):
+        return self.num_batches
+    
+    def __getitem__(self, index):
+        return (self.batch_images[index], self.batch_labels[index], self.batch_sizes[index], self.batch_names[index])
+    
+    def batching(self):
+        all_images = []
+        all_labels = []
+        all_sizes = []
+        all_names = []
+        
+        num_batches = self.num_batches
+        batch_size = self.batch_size
+        for i in tqdm(range(len(self.files)), desc="Loading Cityscapes Dataset"):
+            image, label, size, name = self._preprocess(i)
+            all_images.append(image)
+            all_labels.append(label)
+            all_sizes.append(size)
+            all_names.append(name)
+        
+        batch_images = []
+        batch_labels = []
+        batch_sizes = []
+        batch_names = []
+        
+        for j in range(num_batches):
+            start = j * batch_size 
+            if j == num_batches - 1:
+                end = None
+            else:
+                end = (j + 1) * batch_size
+            batch_images.append(np.stack(all_images[start:end]))                
+            batch_labels.append(np.stack(all_labels[start:end]))                
+            batch_sizes.append(np.stack(all_sizes[start:end]))                
+            batch_names.append(all_names[start:end])
+        return (batch_images, batch_labels, batch_sizes, batch_names)
diff --git a/models/cv/semantic_segmentation/ddrnet/ixrt/utils/metrics.py b/models/cv/semantic_segmentation/ddrnet/ixrt/utils/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0bdfcdf2093b0c2a542dd3ad0f161b8c4ee4ff4
--- /dev/null
+++ b/models/cv/semantic_segmentation/ddrnet/ixrt/utils/metrics.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+"""
+Define function to build confusion_matrix.
+"""
+
+import numpy as np
+
+
+def get_confusion_matrix(label, pred, size, num_class=19, ignore=-1):
+    """
+    Calcute the confusion matrix by given label and pred
+    """
+    output = pred
+    seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8)
+    
+    seg_gt = np.asarray(label[:, :size[-3], :size[-2]], dtype=np.int32)
+
+    ignore_index = seg_gt != ignore
+    seg_gt = seg_gt[ignore_index]
+    seg_pred = seg_pred[ignore_index]
+
+    index = (seg_gt * num_class + seg_pred).astype('int32')
+    label_count = np.bincount(index)
+    confusion_matrix = np.zeros((num_class, num_class))
+
+    for i_label in range(num_class):
+        for i_pred in range(num_class):
+            cur_index = i_label * num_class + i_pred
+            if cur_index < len(label_count):
+                confusion_matrix[i_label,
+                                 i_pred] = label_count[cur_index]
+    return confusion_matrix
+
+
+def get_confusion_matrix_batch(label, pred, size, num_class=19, ignore=-1):
+    """
+    Calcute the confusion matrix by given label and pred in one batch.
+    Arguments:
+        label: (batch_size, h, w)
+        pred:  (batch_size, h, w, c)
+        size:  (batch_size, 2)
+    """
+    batch_size, h, w, c = pred.shape   
+    confusion_matrix = np.zeros((num_class, num_class))
+    for i in range(batch_size):
+        confusion_matrix += get_confusion_matrix(
+            label[i], 
+            pred[i:i+1], 
+            size[i], 
+            19, 
+            255
+            )
+    return confusion_matrix
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/README.md b/models/speech/speech_recognition/deepspeech2/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a903795df8bc04020eacb146358f6e45e5df5bb6
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/README.md
@@ -0,0 +1,74 @@
+# DeepSpeech2 (ixRT)
+
+## Model Description
+
+DeepSpeech2 is an end-to-end speech recognition model based on RNNs and CTC decoding, developed by Baidu. It uses CNN for acoustic feature extraction followed by RNN encoders and CTC decoder.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.4.0 | 26.06 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <http://files.deepspark.org.cn:880/deepspark/data/checkpoints/deepspeech2.onnx>
+
+Dataset: LibriSpeech <http://files.deepspark.org.cn:880/deepspark/data/datasets/LibriSpeech.tar.gz>
+
+### Install Dependencies
+
+Contact the Iluvatar administrator to get the missing packages:
+- paddlepaddle-*.whl
+
+```bash
+pip3 install librosa psutil pysoundfile pytest requests tensorboardX editdistance textgrid onnxsim paddlespeech_ctcdecoders paddleaudio paddlespeech
+pip3 install numpy==1.23.5
+```
+
+### Model Conversion
+
+```bash
+mkdir checkpoints
+cd checkpoints
+wget http://files.deepspark.org.cn:880/deepspark/data/checkpoints/deepspeech2.onnx
+wget http://files.deepspark.org.cn:880/deepspark/data/checkpoints/common_crawl_00.prune01111.trie.klm
+
+
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
+OPTIMIER_FILE=iluvatar-corex-ixrt/tools/optimizer/optimizer.py
+echo "Build engine!"
+python3 modify_model_to_dynamic.py --static_onnx checkpoints/deepspeech2.onnx --dynamic_onnx checkpoints/deepspeech2_dynamic.onnx
+python3 ${OPTIMIER_FILE}  --onnx checkpoints/deepspeech2_dynamic.onnx --model_type rnn --not_sim
+python3 build_engine.py \
+    --model_name deepspeech2 \
+    --onnx_path checkpoints/deepspeech2_dynamic_end.onnx \
+    --engine_path checkpoints/deepspeech2.engine
+
+```
+
+## Model Inference
+
+```bash
+export DATASETS_DIR=/path/to/LibriSpeech/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=./
+```
+
+### FP16
+
+```bash
+# Test ACC (WER)
+bash scripts/infer_deepspeech2_fp16_accuracy.sh
+# Test FPS
+bash scripts/infer_deepspeech2_fp16_performance.sh
+```
+
+## Model Results
+
+| Model        | BatchSize | Precision | ThroughPut    | WER(%) |
+| ------------ | --------- | --------- | ------- | ------ |
+| DeepSpeech2  | 1         | FP16      | 1584.153  | 5.8    |
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/build_engine.py b/models/speech/speech_recognition/deepspeech2/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1ff2a4c1747e83eff4479095e18c179be6870f0
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/build_engine.py
@@ -0,0 +1,82 @@
+import os
+import json
+import onnx
+import logging
+import argparse
+import ctypes
+import tensorrt
+from tensorrt import Dims
+from load_ixrt_plugin import load_ixrt_plugin
+
+load_ixrt_plugin()
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Build tensorrt engine of deepspeech2")
+    parser.add_argument("--model_name", type=str, required=True, help="model name deepspeech2")
+    parser.add_argument("--onnx_path", type=str, required=True, help="The onnx path")
+    parser.add_argument("--bsz", type=int, default=1, help="batch size")
+    parser.add_argument("--input_size", type=tuple, default=(-1, 161), help="inference size")
+    parser.add_argument("--engine_path", type=str, required=True, help="engine path to save")
+    parser.add_argument( "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4")
+
+    args = parser.parse_args()
+    return args
+
+
+def build_engine_trtapi_dynamicshape(args):
+    onnx_model = args.onnx_path
+    assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!"
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+
+    profile = builder.create_optimization_profile()
+
+    profile.set_shape(
+        "input", Dims([1, 100, 161]), Dims([1, 1193, 161]), Dims([1, 3494, 161])
+    )
+
+    build_config.add_optimization_profile(profile)
+
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+
+    parser.parse_from_file(onnx_model)
+    build_config.set_flag(tensorrt.BuilderFlag.FP16)
+
+    # set dynamic
+    input_tensor = network.get_input(0)
+    input_tensor.shape = Dims([1, -1, 161])
+
+    plan = builder.build_serialized_network(network, build_config)
+    with open(args.engine_path, "wb") as f:
+        f.write(plan)
+
+    print("Build dynamic shape engine done!")
+
+
+def build_engine_trtapi_staticshape(args):
+    onnx_model = args.onnx_path
+    assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!"
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+
+    parser.parse_from_file(onnx_model)
+    build_config.set_flag(tensorrt.BuilderFlag.FP16)
+
+    plan = builder.build_serialized_network(network, build_config)
+    with open(args.engine_path, "wb") as f:
+        f.write(plan)
+
+    print("Build static shape engine done!")
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    build_engine_trtapi_dynamicshape(args)
+    # build_engine_trtapi_staticshape(args)
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/ci/prepare.sh b/models/speech/speech_recognition/deepspeech2/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e6a6de447a700f473e393ba0fb88ed371814d2a8
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+pip3 install librosa psutil pysoundfile pytest requests tensorboardX editdistance textgrid onnxsim paddlespeech_ctcdecoders paddleaudio paddlespeech
+pip3 install numpy==1.23.5
+
+mkdir -p checkpoints
+cp /root/data/checkpoints/deepspeech2.onnx checkpoints/
+cp /root/data/checkpoints/common_crawl_00.prune01111.trie.klm checkpoints/
+
+
+OPTIMIER_FILE=/root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py
+echo "Build engine!"
+python3 modify_model_to_dynamic.py --static_onnx checkpoints/deepspeech2.onnx --dynamic_onnx checkpoints/deepspeech2_dynamic.onnx
+python3 ${OPTIMIER_FILE}  --onnx checkpoints/deepspeech2_dynamic.onnx --model_type rnn --not_sim
+python3 build_engine.py \
+    --model_name deepspeech2 \
+    --onnx_path checkpoints/deepspeech2_dynamic_end.onnx \
+    --engine_path checkpoints/deepspeech2.engine
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/convert_weights.py b/models/speech/speech_recognition/deepspeech2/ixrt/convert_weights.py
new file mode 100644
index 0000000000000000000000000000000000000000..16131d6a9189e2fef131ae124b250fca71e36144
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/convert_weights.py
@@ -0,0 +1,141 @@
+import os
+import onnx
+import argparse
+import numpy as np
+from onnx import TensorProto, numpy_helper, helper
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Convert the  weight of lstm in model.")
+    parser.add_argument("--input_onnx", type=str, default="/home/yanlong.hao/DeepSpeech2/ixrt-modelzoo/data/checkpoints/deepspeech2/deepspeech2_part.onnx")
+    parser.add_argument("--output_onnx", type=str, default="/home/yanlong.hao/DeepSpeech2/ixrt-modelzoo/data/checkpoints/deepspeech2/deepspeech2.onnx")
+
+    args = parser.parse_args()
+    return args
+
+
+def convert_weights(args):
+    onnx_model = onnx.load(args.input_onnx)
+    graph = onnx_model.graph
+    node  = graph.node
+    initializer = graph.initializer
+
+    for i in range(len(node)):
+        if node[i].op_type == "LSTM":
+            count = 0
+            for t in node[i].input:
+                if not t:
+                    count += 1
+            print("count: ", count)
+            for _ in range(count):
+                node[i].input.remove("")
+
+            hidden_size = 0
+            for j in range(len(node[i].attribute)):
+                if node[i].attribute[j].name == "hidden_size":
+                    hidden_size = node[i].attribute[j].i
+
+            w_name = node[i].input[1]
+            r_name = node[i].input[2]
+            b_name = node[i].input[3]
+
+            w_data = None
+            r_data = None
+            b_data = None
+
+            for data in initializer:
+                if data.name ==  node[i].input[1]:
+                    dims = list(data.dims).copy()
+                    dims_A = dims.copy()
+                    w_origin_data = np.frombuffer(data.raw_data, dtype=np.float32)
+                    W_save = np.transpose(w_origin_data.reshape(dims), [0, 2, 1])
+                    w1 = W_save[0, :, :hidden_size].reshape(-1)
+                    w2 = W_save[0, :, hidden_size : hidden_size * 2].reshape(-1)
+                    w3 = W_save[0, :, hidden_size * 2 : hidden_size * 3].reshape(-1)
+                    w4 = W_save[0, :, hidden_size * 3 : hidden_size * 4].reshape(-1)
+
+                    w_r1 = W_save[1, :, :hidden_size].reshape(-1)
+                    w_r2 = W_save[1, :, hidden_size : hidden_size * 2].reshape(-1)
+                    w_r3 = W_save[1, :, hidden_size * 2 : hidden_size * 3].reshape(-1)
+                    w_r4 = W_save[1, :, hidden_size * 3 : hidden_size * 4].reshape(-1)
+
+                    w_data = np.concatenate([w1, w2, w3, w4, w_r1, w_r2, w_r3, w_r4])
+                    print("w_data shape: ", w_data.shape)
+
+                if data.name ==  node[i].input[2]:
+                    dims = list(data.dims).copy()
+                    dims_B = dims.copy()
+                    r_origin_data = np.frombuffer(data.raw_data, dtype=np.float32)
+                    R_save = np.transpose(r_origin_data.reshape(dims), [0, 2, 1])
+                    r1 = R_save[0, :, :hidden_size].reshape(-1)
+                    r2 = R_save[0, :, hidden_size : hidden_size * 2].reshape(-1)
+                    r3 = R_save[0, :, hidden_size * 2 : hidden_size * 3].reshape(-1)
+                    r4 = R_save[0, :, hidden_size * 3 : hidden_size * 4].reshape(-1)
+
+                    r_r1 = R_save[1, :, :hidden_size].reshape(-1)
+                    r_r2 = R_save[1, :, hidden_size : hidden_size * 2].reshape(-1)
+                    r_r3 = R_save[1, :, hidden_size * 2 : hidden_size * 3].reshape(-1)
+                    r_r4 = R_save[1, :, hidden_size * 3 : hidden_size * 4].reshape(-1)
+
+                    r_data = np.concatenate([r1, r2, r3, r4, r_r1, r_r2, r_r3, r_r4])
+                    print("r_data shape: ", r_data.shape)
+
+                if data.name ==  node[i].input[3]:
+                    dims = data.dims
+                    b_origin_data = np.frombuffer(data.raw_data, dtype=np.float32)
+                    B_save = b_origin_data.reshape(dims)
+                    bias_ih = B_save[0, : hidden_size * 4]
+                    bias_hh = B_save[0, hidden_size * 4 : hidden_size * 8]
+                    bias_f = bias_ih + bias_hh  # bias add merge
+                    bias_r_ih = B_save[1, : hidden_size * 4]
+                    bias_r_hh = B_save[1, hidden_size * 4 : hidden_size * 8]
+                    bias_r = bias_r_ih + bias_r_hh  # bias add merge
+                    b_data = np.concatenate([bias_f, bias_r])
+                    print("b_data shape: ", b_data.shape)
+
+            for save_data in initializer:
+                if w_name == save_data.name:
+                    save_data.raw_data=w_data.astype(np.float32).tobytes()
+
+                elif r_name == save_data.name:
+                    save_data.raw_data=r_data.astype(np.float32).tobytes()
+
+                elif b_name == save_data.name:
+                    save_data.raw_data=b_data.astype(np.float32).tobytes()
+                    save_data.dims[1] = int(save_data.dims[1] / 2)
+
+
+    for data in initializer:
+
+        if data.name == "p2o.helper.constant.2":
+            raw_data = np.frombuffer(data.raw_data, dtype=np.int64)
+            tmp_data = raw_data.copy()
+            tmp_data[0] = 1
+            # tmp_data[0] = 16
+            tmp_data[1] = -1
+            tmp_data[2] = 1248
+            data.raw_data = tmp_data.tobytes()
+
+    lstm_reshape_name = "p2o.helper.constant.4"
+    # batch size: 1
+    lstm_reshape_params = helper.make_tensor(lstm_reshape_name, onnx.TensorProto.INT64, [3], [-1,1,2048])
+    # batch size: 16
+    # lstm_reshape_params = helper.make_tensor(lstm_reshape_name, onnx.TensorProto.INT64, [3], [-1,16,2048])
+    initializer.append(lstm_reshape_params)
+
+    first_reshape_node = True
+    for i in range(len(node)):
+        if node[i].op_type == "Reshape":
+            if first_reshape_node:
+                first_reshape_node = False
+                continue
+            else:
+                node[i].input[1] = lstm_reshape_name
+
+    onnx.save(onnx_model, args.output_onnx)
+
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    convert_weights(args)
+    print("Save Down!")
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/cut_onnx_model.py b/models/speech/speech_recognition/deepspeech2/ixrt/cut_onnx_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f7ad073016994d9da0c676eb156a51605219547
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/cut_onnx_model.py
@@ -0,0 +1,12 @@
+import os
+import onnx
+
+base_path = "../../../../../data/checkpoints/deepspeech2"
+
+raw_path = os.path.join(base_path, "deepspeech2_all.onnx")
+save_path = os.path.join(base_path, "deepspeech2_part.onnx")
+
+input_names = ["input"]
+output_names = ["layer_norm_9.tmp_2"]
+
+onnx.utils.extract_model(raw_path, save_path, input_names, output_names)
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/data/decoder.pdparams b/models/speech/speech_recognition/deepspeech2/ixrt/data/decoder.pdparams
new file mode 100644
index 0000000000000000000000000000000000000000..c56060aa93728a5d471e95912f491ff0e021dcbe
Binary files /dev/null and b/models/speech/speech_recognition/deepspeech2/ixrt/data/decoder.pdparams differ
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/data/demo_002_en.wav b/models/speech/speech_recognition/deepspeech2/ixrt/data/demo_002_en.wav
new file mode 100644
index 0000000000000000000000000000000000000000..6dec925262b87ad659421edce892b0ab3b5039c4
Binary files /dev/null and b/models/speech/speech_recognition/deepspeech2/ixrt/data/demo_002_en.wav differ
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/data/mean_std.json b/models/speech/speech_recognition/deepspeech2/ixrt/data/mean_std.json
new file mode 100644
index 0000000000000000000000000000000000000000..0867476f50aa0806df9ae6403a8d717c20cbe2ad
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/data/mean_std.json
@@ -0,0 +1 @@
+{"mean_stat": [24156894.0, 12346911.0, 22422352.0, 24839050.0, -39564016.0, 26636840.0, 25011566.0, 24835082.0, 29086770.0, -39564016.0, 30332006.0, 27864978.0, 29913940.0, 30884468.0, 27886950.0, 32087904.0, -39564016.0, 32251384.0, 22983980.0, 31812588.0, 27079002.0, 31846070.0, 27868144.0, 32247188.0, 27818408.0, 32808634.0, 28970758.0, 33002436.0, 30088680.0, 32556118.0, 31661602.0, 31608772.0, 32695986.0, 31184330.0, 32831278.0, 31139706.0, 31246538.0, 32284240.0, 30891642.0, 31133906.0, 31388504.0, 30680138.0, 30491506.0, 31091484.0, 30377426.0, 30421270.0, 30813302.0, 30274476.0, 30598140.0, 30471758.0, 30579814.0, 30329296.0, 30493232.0, 30623062.0, 30493558.0, 30204112.0, 30423514.0, 30253188.0, 30602832.0, 30749188.0, 30818314.0, 30849054.0, 30869560.0, 30913844.0, 30980630.0, 31086216.0, 31331224.0, 31680480.0, 31968040.0, 32049322.0, 32063286.0, 32256122.0, 32434294.0, 32573840.0, 32674094.0, 32730916.0, 32978416.0, 33109402.0, 32979748.0, 33051350.0, 33052606.0, 33441594.0, 33257368.0, 33116758.0, 33340022.0, 33602192.0, 33161626.0, 33500680.0, 33578800.0, 33243446.0, 33923204.0, 33347320.0, 34025008.0, 33657892.0, 34250184.0, 33890276.0, 34463444.0, 34195792.0, 34383156.0, 34520940.0, 34386284.0, 34438136.0, 34548276.0, 34413796.0, 34502292.0, 34560776.0, 34626944.0, 34570476.0, 34526264.0, 34546036.0, 34544248.0, 34544372.0, 34543380.0, 34524356.0, 34496476.0, 34466152.0, 34515864.0, 34529828.0, 34519284.0, 34534880.0, 34563896.0, 34623720.0, 34452040.0, 34501760.0, 34289436.0, 34102164.0, 34146876.0, 33918084.0, 33886240.0, 33774224.0, 33625140.0, 33574368.0, 33387480.0, 33303508.0, 33195294.0, 33030560.0, 32926086.0, 32853062.0, 32793650.0, 32764236.0, 32693808.0, 32635580.0, 32590254.0, 32567152.0, 32705322.0, 32613326.0, 32610814.0, 32676314.0, 32564762.0, 32590186.0, 32465998.0, 32398008.0, 32458644.0, 32346022.0, 32200392.0, 32081072.0, 31974116.0, 31883154.0, 31762528.0, 31613754.0, 31392360.0], "var_stat": [272001248.0, 94793536.0, 243428896.0, 290309088.0, 637701760.0, 336307072.0, 301866720.0, 305879168.0, 399924352.0, 637701760.0, 435734816.0, 376857024.0, 429138816.0, 453332832.0, 381586304.0, 484897120.0, 637701760.0, 487575968.0, 277610784.0, 473961248.0, 359646656.0, 474523328.0, 377121184.0, 486117824.0, 377040512.0, 502208736.0, 403991680.0, 507742560.0, 432004512.0, 495375488.0, 471045696.0, 469675360.0, 497274720.0, 456176672.0, 499777376.0, 454574560.0, 457307136.0, 483500736.0, 445531520.0, 452447296.0, 457813120.0, 438017440.0, 434063200.0, 448154080.0, 428310304.0, 428972224.0, 439074464.0, 424764320.0, 431150144.0, 427725120.0, 430921920.0, 424025312.0, 426546304.0, 429184832.0, 425736928.0, 419302016.0, 424773664.0, 419875488.0, 427891680.0, 431239648.0, 432819488.0, 433514496.0, 433968800.0, 434929376.0, 436460768.0, 438922048.0, 443221952.0, 449724736.0, 456049792.0, 459802528.0, 463549376.0, 469183936.0, 474003584.0, 477739808.0, 480394752.0, 481895008.0, 488445952.0, 491934816.0, 488405504.0, 490087968.0, 489980800.0, 500284928.0, 495156320.0, 491229760.0, 496754208.0, 503509888.0, 491718784.0, 500877728.0, 503081248.0, 494274336.0, 512897952.0, 497379520.0, 516031520.0, 506352192.0, 523052768.0, 513294720.0, 529277536.0, 521843712.0, 527096864.0, 530821984.0, 526553824.0, 527675936.0, 530968416.0, 527220224.0, 529501184.0, 530768864.0, 532748704.0, 531494624.0, 529949632.0, 530147264.0, 529992288.0, 529933504.0, 529808640.0, 529262784.0, 528516896.0, 527792320.0, 529182784.0, 529615936.0, 529410560.0, 529778816.0, 530344224.0, 531732640.0, 525803968.0, 526245088.0, 522054464.0, 517174528.0, 518361600.0, 511909600.0, 510847392.0, 507652000.0, 503427968.0, 501848896.0, 496581568.0, 493911488.0, 490921440.0, 486737472.0, 483823584.0, 481722496.0, 480019008.0, 478763264.0, 476989056.0, 475359136.0, 473979872.0, 473168672.0, 476439520.0, 474125792.0, 473999968.0, 475610112.0, 472580128.0, 473176800.0, 469737056.0, 467798176.0, 468517056.0, 465801504.0, 462322720.0, 459281952.0, 456547808.0, 454174240.0, 450991168.0, 447175136.0, 441819584.0], "frame_num": 2454662}
\ No newline at end of file
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/data/preprocess.yaml b/models/speech/speech_recognition/deepspeech2/ixrt/data/preprocess.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f526e0ad34206199ee5593c6ca190409a335593
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/data/preprocess.yaml
@@ -0,0 +1,25 @@
+process:
+  # extract kaldi fbank from PCM
+  - type: fbank_kaldi
+    fs: 16000
+    n_mels: 161
+    n_shift: 160
+    win_length: 400
+    dither: 0.1
+  - type: cmvn_json
+    cmvn_path: data/mean_std.json
+  # these three processes are a.k.a. SpecAugument
+  - type: time_warp
+    max_time_warp: 5
+    inplace: true
+    mode: PIL
+  - type: freq_mask
+    F: 30
+    n_mask: 2
+    inplace: true
+    replace_with_zero: false
+  - type: time_mask
+    T: 40
+    n_mask: 2
+    inplace: true
+    replace_with_zero: false
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/dataset/__init__.py b/models/speech/speech_recognition/deepspeech2/ixrt/dataset/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf3a39f671298828c16a584ca13ba67b3d68f2ae
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/dataset/__init__.py
@@ -0,0 +1,2 @@
+
+from .librispeech import LibriSpeech
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/dataset/librispeech.py b/models/speech/speech_recognition/deepspeech2/ixrt/dataset/librispeech.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b0a88a617369677b40f2d3d2b7926d76803635c
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/dataset/librispeech.py
@@ -0,0 +1,70 @@
+"""
+Define the dataset of LibriSpeech
+"""
+
+import os
+import glob
+import soundfile
+import numpy as np
+
+
+class LibriSpeech:
+    def __init__(self, dataroot, transform=None):
+        assert os.path.exists(dataroot), f"The {dataroot} must be existed!"
+        self.dataroot = dataroot
+        self._parse_file()
+        self.transform = transform
+
+    def _parse_file(self):
+        """
+        Parse the test-clean data and groundtruth
+        """
+        audio_names = []
+        text_transcripts = []
+        text_pattern = os.path.join(self.dataroot, "test-clean", "*", "*", "*.trans.txt")
+        text_files = glob.glob(text_pattern)
+        print(f"[INFO]: text files length: {len(text_files)}")
+
+        for text_file in text_files:
+            # print(f"Processing: {os.path.basename(text_file)}")
+
+            with open(text_file, 'r') as f:
+                lines = f.readlines()
+
+            lines = [line.strip().split(' ', maxsplit=1) for line in lines]
+            for line in lines:
+                audio_name, text = line
+                audio_names.append(audio_name)
+                text_transcripts.append(text)
+
+        self.audio_names = audio_names
+        self.text_transcripts = text_transcripts
+        print("[INFO]: Achieve Parsing!")
+
+    def __len__(self):
+        return len(self.audio_names)
+
+    def __getitem__(self, idx):
+
+        audio_name = self.audio_names[idx]
+        text_gt = self.text_transcripts[idx]
+
+        # print(f"audio_name: {audio_name}")
+        # print(f"text_gt: {text_gt}")
+
+        name, subname, _ = audio_name.split('-')
+        audio_file = os.path.join(self.dataroot, "test-clean", name, subname, audio_name + ".flac")
+
+        audio, sample_rate = soundfile.read(audio_file, dtype="int16", always_2d=True)
+        audio = audio[:, 0]
+        # print(f"audio shape: {audio.shape}")
+
+        if self.transform is None:
+            input_data = audio
+        else:
+            preprocess_args = {"train": False}
+            input_data = self.transform(audio, **preprocess_args)
+
+        input_data = np.expand_dims(input_data.astype(np.float32), axis=0)
+        return input_data, text_gt
+
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/__init__.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f235fe7d0bc2fb623e23493506ef9896a392fbf
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/__init__.py
@@ -0,0 +1,2 @@
+
+from .ctc import CTCDecoder
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/align.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/align.py
new file mode 100644
index 0000000000000000000000000000000000000000..34d796145c65fa430ef0d05251ce7a728d9d8f9b
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/align.py
@@ -0,0 +1,162 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+
+import paddle
+from paddle import nn
+"""
+    To align the initializer between paddle and torch, 
+    the API below are set defalut initializer with priority higger than global initializer.
+"""
+global_init_type = None
+
+
+class LayerNorm(nn.LayerNorm):
+    def __init__(self,
+                 normalized_shape,
+                 epsilon=1e-05,
+                 weight_attr=None,
+                 bias_attr=None,
+                 name=None):
+        if weight_attr is None:
+            weight_attr = paddle.ParamAttr(
+                initializer=nn.initializer.Constant(1.0))
+        if bias_attr is None:
+            bias_attr = paddle.ParamAttr(
+                initializer=nn.initializer.Constant(0.0))
+        super(LayerNorm, self).__init__(normalized_shape, epsilon, weight_attr,
+                                        bias_attr, name)
+
+
+class BatchNorm1D(nn.BatchNorm1D):
+    def __init__(self,
+                 num_features,
+                 momentum=0.9,
+                 epsilon=1e-05,
+                 weight_attr=None,
+                 bias_attr=None,
+                 data_format='NCL',
+                 name=None):
+        if weight_attr is None:
+            weight_attr = paddle.ParamAttr(
+                initializer=nn.initializer.Constant(1.0))
+        if bias_attr is None:
+            bias_attr = paddle.ParamAttr(
+                initializer=nn.initializer.Constant(0.0))
+        super(BatchNorm1D,
+              self).__init__(num_features, momentum, epsilon, weight_attr,
+                             bias_attr, data_format, name)
+
+
+class Embedding(nn.Embedding):
+    def __init__(self,
+                 num_embeddings,
+                 embedding_dim,
+                 padding_idx=None,
+                 sparse=False,
+                 weight_attr=None,
+                 name=None):
+        if weight_attr is None:
+            weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal())
+        super(Embedding, self).__init__(num_embeddings, embedding_dim,
+                                        padding_idx, sparse, weight_attr, name)
+
+
+class Linear(nn.Linear):
+    def __init__(self,
+                 in_features,
+                 out_features,
+                 weight_attr=None,
+                 bias_attr=None,
+                 name=None):
+        if weight_attr is None:
+            if global_init_type == "kaiming_uniform":
+                weight_attr = paddle.ParamAttr(
+                    initializer=nn.initializer.KaimingUniform(
+                        fan_in=None,
+                        negative_slope=math.sqrt(5),
+                        nonlinearity='leaky_relu'))
+        if bias_attr is None:
+            if global_init_type == "kaiming_uniform":
+                bias_attr = paddle.ParamAttr(
+                    initializer=nn.initializer.KaimingUniform(
+                        fan_in=None,
+                        negative_slope=math.sqrt(5),
+                        nonlinearity='leaky_relu'))
+        super(Linear, self).__init__(in_features, out_features, weight_attr,
+                                     bias_attr, name)
+
+
+class Conv1D(nn.Conv1D):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 padding_mode='zeros',
+                 weight_attr=None,
+                 bias_attr=None,
+                 data_format='NCL'):
+        if weight_attr is None:
+            if global_init_type == "kaiming_uniform":
+                weight_attr = paddle.ParamAttr(
+                    initializer=nn.initializer.KaimingUniform(
+                        fan_in=None,
+                        negative_slope=math.sqrt(5),
+                        nonlinearity='leaky_relu'))
+        if bias_attr is None:
+            if global_init_type == "kaiming_uniform":
+                bias_attr = paddle.ParamAttr(
+                    initializer=nn.initializer.KaimingUniform(
+                        fan_in=None,
+                        negative_slope=math.sqrt(5),
+                        nonlinearity='leaky_relu'))
+        super(Conv1D, self).__init__(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            groups, padding_mode, weight_attr, bias_attr, data_format)
+
+
+class Conv2D(nn.Conv2D):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 padding_mode='zeros',
+                 weight_attr=None,
+                 bias_attr=None,
+                 data_format='NCHW'):
+        if weight_attr is None:
+            if global_init_type == "kaiming_uniform":
+                weight_attr = paddle.ParamAttr(
+                    initializer=nn.initializer.KaimingUniform(
+                        fan_in=None,
+                        negative_slope=math.sqrt(5),
+                        nonlinearity='leaky_relu'))
+        if bias_attr is None:
+            if global_init_type == "kaiming_uniform":
+                bias_attr = paddle.ParamAttr(
+                    initializer=nn.initializer.KaimingUniform(
+                        fan_in=None,
+                        negative_slope=math.sqrt(5),
+                        nonlinearity='leaky_relu'))
+        super(Conv2D, self).__init__(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            groups, padding_mode, weight_attr, bias_attr, data_format)
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctc.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..1069bec8ef2ac30b7ee6da831cbb7a4a79673551
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctc.py
@@ -0,0 +1,443 @@
+import sys
+from typing import Union
+
+import paddle
+from paddle import nn
+from paddle.nn import functional as F
+
+from .align import Linear
+from .loss import CTCLoss
+
+from . import ctc_utils
+from .ctcdecoder import ctc_beam_search_decoding_batch  # noqa: F401
+from .ctcdecoder import ctc_greedy_decoding  # noqa: F401
+from .ctcdecoder import Scorer  # noqa: F401
+from .ctcdecoder import CTCBeamSearchDecoder  # noqa: F401
+
+
+__all__ = ['CTCDecoder']
+
+
+class CTCDecoderBase(nn.Layer):
+    def __init__(self,
+                 odim,
+                 enc_n_units,
+                 blank_id=0,
+                 dropout_rate: float=0.0,
+                 reduction: Union[str, bool]=True,
+                 batch_average: bool=True,
+                 grad_norm_type: Union[str, None]=None):
+        """CTC decoder
+
+        Args:
+            odim ([int]): text vocabulary size
+            enc_n_units ([int]): encoder output dimention
+            dropout_rate (float): dropout rate (0.0 ~ 1.0)
+            reduction (bool): reduce the CTC loss into a scalar, True for 'sum' or 'none'
+            batch_average (bool): do batch dim wise average.
+            grad_norm_type (str): Default, None. one of 'instance', 'batch', 'frame', None.
+        """
+        super().__init__()
+
+        self.blank_id = blank_id
+        self.odim = odim
+        self.dropout = nn.Dropout(dropout_rate)
+        self.ctc_lo = Linear(enc_n_units, self.odim)
+        if isinstance(reduction, bool):
+            reduction_type = "sum" if reduction else "none"
+        else:
+            reduction_type = reduction
+        self.criterion = CTCLoss(
+            blank=self.blank_id,
+            reduction=reduction_type,
+            batch_average=batch_average,
+            grad_norm_type=grad_norm_type)
+
+    def forward(self, hs_pad, hlens, ys_pad, ys_lens):
+        """Calculate CTC loss.
+
+        Args:
+            hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D)
+            hlens (Tensor): batch of lengths of hidden state sequences (B)
+            ys_pad (Tensor): batch of padded character id sequence tensor (B, Lmax)
+            ys_lens (Tensor): batch of lengths of character sequence (B)
+        Returns:
+            loss (Tensor): ctc loss value, scalar.
+        """
+        logits = self.ctc_lo(self.dropout(hs_pad))
+        loss = self.criterion(logits, ys_pad, hlens, ys_lens)
+        return loss
+
+    def softmax(self, eouts: paddle.Tensor, temperature: float=1.0):
+        """Get CTC probabilities.
+        Args:
+            eouts (FloatTensor): `[B, T, enc_units]`
+        Returns:
+            probs (FloatTensor): `[B, T, odim]`
+        """
+        self.probs = F.softmax(self.ctc_lo(eouts) / temperature, axis=2)
+        return self.probs
+
+    def log_softmax(self, hs_pad: paddle.Tensor,
+                    temperature: float=1.0) -> paddle.Tensor:
+        """log_softmax of frame activations
+        Args:
+            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
+        Returns:
+            paddle.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
+        """
+        return F.log_softmax(self.ctc_lo(hs_pad) / temperature, axis=2)
+
+    def argmax(self, hs_pad: paddle.Tensor) -> paddle.Tensor:
+        """argmax of frame activations
+        Args:
+            paddle.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
+        Returns:
+            paddle.Tensor: argmax applied 2d tensor (B, Tmax)
+        """
+        return paddle.argmax(self.ctc_lo(hs_pad), dim=2)
+
+    def forced_align(self,
+                     ctc_probs: paddle.Tensor,
+                     y: paddle.Tensor,
+                     blank_id=0) -> list:
+        """ctc forced alignment.
+        Args:
+            ctc_probs (paddle.Tensor): hidden state sequence, 2d tensor (T, D)
+            y (paddle.Tensor): label id sequence tensor, 1d tensor (L)
+            blank_id (int): blank symbol index
+        Returns:
+            paddle.Tensor: best alignment result, (T).
+        """
+        return ctc_utils.forced_align(ctc_probs, y, blank_id)
+
+
+class CTCDecoder(CTCDecoderBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # CTCDecoder LM Score handle
+        self._ext_scorer = None
+        self.beam_search_decoder = None
+
+    def _decode_batch_greedy_offline(self, probs_split, vocab_list):
+        """This function will be deprecated in future.
+        Decode by best path for a batch of probs matrix input.
+        :param probs_split: List of 2-D probability matrix, and each consists
+                            of prob vectors for one speech utterancce.
+        :param probs_split: List of matrix
+        :param vocab_list: List of tokens in the vocabulary, for decoding.
+        :type vocab_list: list
+        :return: List of transcription texts.
+        :rtype: List of str
+        """
+        results = []
+        for i, probs in enumerate(probs_split):
+            output_transcription = ctc_greedy_decoding(
+                probs_seq=probs, vocabulary=vocab_list, blank_id=self.blank_id)
+            results.append(output_transcription)
+        return results
+
+    def _init_ext_scorer(self, beam_alpha, beam_beta, language_model_path,
+                         vocab_list):
+        """Initialize the external scorer.
+        :param beam_alpha: Parameter associated with language model.
+        :type beam_alpha: float
+        :param beam_beta: Parameter associated with word count.
+        :type beam_beta: float
+        :param language_model_path: Filepath for language model. If it is
+                                    empty, the external scorer will be set to
+                                    None, and the decoding method will be pure
+                                    beam search without scorer.
+        :type language_model_path: str|None
+        :param vocab_list: List of tokens in the vocabulary, for decoding.
+        :type vocab_list: list
+        """
+        # init once
+        if self._ext_scorer is not None:
+            return
+
+        if language_model_path != '':
+            print("begin to initialize the external scorer "
+                        "for decoding")
+            self._ext_scorer = Scorer(beam_alpha, beam_beta,
+                                      language_model_path, vocab_list)
+            lm_char_based = self._ext_scorer.is_character_based()
+            lm_max_order = self._ext_scorer.get_max_order()
+            lm_dict_size = self._ext_scorer.get_dict_size()
+            print("language model: "
+                        "is_character_based = %d," % lm_char_based +
+                        " max_order = %d," % lm_max_order + " dict_size = %d" %
+                        lm_dict_size)
+            print("end initializing scorer")
+        else:
+            self._ext_scorer = None
+            print("no language model provided, "
+                        "decoding by pure beam search without scorer.")
+
+    def _decode_batch_beam_search_offline(
+            self, probs_split, beam_alpha, beam_beta, beam_size, cutoff_prob,
+            cutoff_top_n, vocab_list, num_processes):
+        """
+        This function will be deprecated in future.
+        Decode by beam search for a batch of probs matrix input.
+        :param probs_split: List of 2-D probability matrix, and each consists
+                            of prob vectors for one speech utterancce.
+        :param probs_split: List of matrix
+        :param beam_alpha: Parameter associated with language model.
+        :type beam_alpha: float
+        :param beam_beta: Parameter associated with word count.
+        :type beam_beta: float
+        :param beam_size: Width for Beam search.
+        :type beam_size: int
+        :param cutoff_prob: Cutoff probability in pruning,
+                            default 1.0, no pruning.
+        :type cutoff_prob: float
+        :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
+                        characters with highest probs in vocabulary will be
+                        used in beam search, default 40.
+        :type cutoff_top_n: int
+        :param vocab_list: List of tokens in the vocabulary, for decoding.
+        :type vocab_list: list
+        :param num_processes: Number of processes (CPU) for decoder.
+        :type num_processes: int
+        :return: List of transcription texts.
+        :rtype: List of str
+        """
+        if self._ext_scorer is not None:
+            self._ext_scorer.reset_params(beam_alpha, beam_beta)
+
+        # beam search decode
+        num_processes = min(num_processes, len(probs_split))
+        beam_search_results = ctc_beam_search_decoding_batch(
+            probs_split=probs_split,
+            vocabulary=vocab_list,
+            beam_size=beam_size,
+            num_processes=num_processes,
+            ext_scoring_func=self._ext_scorer,
+            cutoff_prob=cutoff_prob,
+            cutoff_top_n=cutoff_top_n,
+            blank_id=self.blank_id)
+
+        results = [result[0][1] for result in beam_search_results]
+        return results
+
+    def init_decoder(self, batch_size, vocab_list, decoding_method,
+                     lang_model_path, beam_alpha, beam_beta, beam_size,
+                     cutoff_prob, cutoff_top_n, num_processes):
+        """
+        init ctc decoders
+        Args:
+            batch_size(int): Batch size for input data
+            vocab_list (list): List of tokens in the vocabulary, for decoding
+            decoding_method (str): ctc_beam_search
+            lang_model_path (str): language model path
+            beam_alpha (float): beam_alpha
+            beam_beta (float): beam_beta
+            beam_size (int): beam_size
+            cutoff_prob (float): cutoff probability in beam search
+            cutoff_top_n (int): cutoff_top_n
+            num_processes (int): num_processes
+
+        Raises:
+            ValueError: when decoding_method not support.
+
+        Returns:
+            CTCBeamSearchDecoder
+        """
+        self.batch_size = batch_size
+        self.vocab_list = vocab_list
+        self.decoding_method = decoding_method
+        self.beam_size = beam_size
+        self.cutoff_prob = cutoff_prob
+        self.cutoff_top_n = cutoff_top_n
+        self.num_processes = num_processes
+        if decoding_method == "ctc_beam_search":
+            self._init_ext_scorer(beam_alpha, beam_beta, lang_model_path,
+                                  vocab_list)
+            if self.beam_search_decoder is None:
+                self.beam_search_decoder = self.get_decoder(
+                    vocab_list, batch_size, beam_alpha, beam_beta, beam_size,
+                    num_processes, cutoff_prob, cutoff_top_n)
+            return self.beam_search_decoder
+        elif decoding_method == "ctc_greedy":
+            self._init_ext_scorer(beam_alpha, beam_beta, lang_model_path,
+                                  vocab_list)
+        else:
+            raise ValueError(f"Not support: {decoding_method}")
+
+    def decode_probs_offline(self, probs, logits_lens, vocab_list,
+                             decoding_method, lang_model_path, beam_alpha,
+                             beam_beta, beam_size, cutoff_prob, cutoff_top_n,
+                             num_processes):
+        """
+        This function will be deprecated in future.
+        ctc decoding with probs.
+        Args:
+            probs (Tensor): activation after softmax
+            logits_lens (Tensor): audio output lens
+            vocab_list (list): List of tokens in the vocabulary, for decoding
+            decoding_method (str): ctc_beam_search
+            lang_model_path (str): language model path
+            beam_alpha (float): beam_alpha
+            beam_beta (float): beam_beta
+            beam_size (int): beam_size
+            cutoff_prob (float): cutoff probability in beam search
+            cutoff_top_n (int): cutoff_top_n
+            num_processes (int): num_processes
+
+        Raises:
+            ValueError: when decoding_method not support.
+
+        Returns:
+            List[str]: transcripts.
+        """
+        probs_split = [probs[i, :l, :] for i, l in enumerate(logits_lens)]
+        if decoding_method == "ctc_greedy":
+            result_transcripts = self._decode_batch_greedy_offline(
+                probs_split=probs_split, vocab_list=vocab_list)
+        elif decoding_method == "ctc_beam_search":
+            result_transcripts = self._decode_batch_beam_search_offline(
+                probs_split=probs_split,
+                beam_alpha=beam_alpha,
+                beam_beta=beam_beta,
+                beam_size=beam_size,
+                cutoff_prob=cutoff_prob,
+                cutoff_top_n=cutoff_top_n,
+                vocab_list=vocab_list,
+                num_processes=num_processes)
+        else:
+            raise ValueError(f"Not support: {decoding_method}")
+        return result_transcripts
+
+    def get_decoder(self, vocab_list, batch_size, beam_alpha, beam_beta,
+                    beam_size, num_processes, cutoff_prob, cutoff_top_n):
+        """
+        init get ctc decoder
+        Args:
+            vocab_list (list): List of tokens in the vocabulary, for decoding.
+            batch_size(int): Batch size for input data
+            beam_alpha (float): beam_alpha
+            beam_beta (float): beam_beta
+            beam_size (int): beam_size
+            num_processes (int): num_processes
+            cutoff_prob (float): cutoff probability in beam search
+            cutoff_top_n (int): cutoff_top_n
+
+        Raises:
+            ValueError: when decoding_method not support.
+
+        Returns:
+            CTCBeamSearchDecoder
+        """
+        num_processes = min(num_processes, batch_size)
+        if self._ext_scorer is not None:
+            self._ext_scorer.reset_params(beam_alpha, beam_beta)
+        if self.decoding_method == "ctc_beam_search":
+            beam_search_decoder = CTCBeamSearchDecoder(
+                vocab_list, batch_size, beam_size, num_processes, cutoff_prob,
+                cutoff_top_n, self._ext_scorer, self.blank_id)
+        else:
+            raise ValueError(f"Not support: {decoding_method}")
+        return beam_search_decoder
+
+    def next(self, probs, logits_lens):
+        """
+        Input probs into ctc decoder
+        Args:
+            probs (list(list(float))): probs for a batch of data
+            logits_lens (list(int)): logits lens for a batch of data
+        Raises:
+            Exception: when the ctc decoder is not initialized
+            ValueError: when decoding_method not support.
+        """
+
+        if self.beam_search_decoder is None:
+            raise Exception(
+                "You need to initialize the beam_search_decoder firstly")
+        beam_search_decoder = self.beam_search_decoder
+
+        has_value = (logits_lens > 0).tolist()
+        has_value = [
+            "true" if has_value[i] is True else "false"
+            for i in range(len(has_value))
+        ]
+        probs_split = [
+            probs[i, :l, :].tolist() if has_value[i] else probs[i].tolist()
+            for i, l in enumerate(logits_lens)
+        ]
+        if self.decoding_method == "ctc_beam_search":
+            beam_search_decoder.next(probs_split, has_value)
+        else:
+            raise ValueError(f"Not support: {decoding_method}")
+
+        return
+
+    def decode(self):
+        """
+        Get the decoding result
+        Raises:
+            Exception: when the ctc decoder is not initialized
+            ValueError: when decoding_method not support.
+        Returns:
+            results_best (list(str)): The best result for a batch of data
+            results_beam (list(list(str))): The beam search result for a batch of data
+        """
+        if self.beam_search_decoder is None:
+            raise Exception(
+                "You need to initialize the beam_search_decoder firstly")
+
+        beam_search_decoder = self.beam_search_decoder
+        if self.decoding_method == "ctc_beam_search":
+            batch_beam_results = beam_search_decoder.decode()
+            batch_beam_results = [[(res[0], res[1]) for res in beam_results]
+                                  for beam_results in batch_beam_results]
+            results_best = [result[0][1] for result in batch_beam_results]
+            results_beam = [[trans[1] for trans in result]
+                            for result in batch_beam_results]
+
+        else:
+            raise ValueError(f"Not support: {decoding_method}")
+
+        return results_best, results_beam
+
+    def reset_decoder(self,
+                      batch_size=-1,
+                      beam_size=-1,
+                      num_processes=-1,
+                      cutoff_prob=-1.0,
+                      cutoff_top_n=-1):
+        if batch_size > 0:
+            self.batch_size = batch_size
+        if beam_size > 0:
+            self.beam_size = beam_size
+        if num_processes > 0:
+            self.num_processes = num_processes
+        if cutoff_prob > 0:
+            self.cutoff_prob = cutoff_prob
+        if cutoff_top_n > 0:
+            self.cutoff_top_n = cutoff_top_n
+        """
+        Reset the decoder state
+        Args:
+            batch_size(int): Batch size for input data
+            beam_size (int): beam_size
+            num_processes (int): num_processes
+            cutoff_prob (float): cutoff probability in beam search
+            cutoff_top_n (int): cutoff_top_n
+        Raises:
+            Exception: when the ctc decoder is not initialized
+        """
+        if self.beam_search_decoder is None:
+            raise Exception(
+                "You need to initialize the beam_search_decoder firstly")
+        self.beam_search_decoder.reset_state(
+            self.batch_size, self.beam_size, self.num_processes,
+            self.cutoff_prob, self.cutoff_top_n)
+
+    def del_decoder(self):
+        """
+        Delete the decoder
+        """
+        if self.beam_search_decoder is not None:
+            del self.beam_search_decoder
+            self.beam_search_decoder = None
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctc_utils.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctc_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..df60028cd1fd77f6c534a78bf4844c3a49a668ce
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctc_utils.py
@@ -0,0 +1,195 @@
+from pathlib import Path
+from typing import List
+
+import numpy as np
+import paddle
+
+from .utils import text_grid
+from .utils import utility
+
+
+__all__ = ["forced_align", "remove_duplicates_and_blank", "insert_blank"]
+
+
+def remove_duplicates_and_blank(hyp: List[int], blank_id=0) -> List[int]:
+    """ctc alignment to ctc label ids.
+
+    "abaa-acee-" -> "abaace"
+
+    Args:
+        hyp (List[int]): hypotheses ids, (L)
+        blank_id (int, optional): blank id. Defaults to 0.
+
+    Returns:
+        List[int]: remove dupicate ids, then remove blank id.
+    """
+    new_hyp: List[int] = []
+    cur = 0
+    while cur < len(hyp):
+        # add non-blank into new_hyp
+        if hyp[cur] != blank_id:
+            new_hyp.append(hyp[cur])
+        # skip repeat label
+        prev = cur
+        while cur < len(hyp) and hyp[cur] == hyp[prev]:
+            cur += 1
+    return new_hyp
+
+
+def insert_blank(label: np.ndarray, blank_id: int=0) -> np.ndarray:
+    """Insert blank token between every two label token.
+
+    "abcdefg" -> "-a-b-c-d-e-f-g-"
+
+    Args:
+        label ([np.ndarray]): label ids, List[int], (L).
+        blank_id (int, optional): blank id. Defaults to 0.
+
+    Returns:
+        [np.ndarray]: (2L+1).
+    """
+    label = np.expand_dims(label, 1)  #[L, 1]
+    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
+    label = np.concatenate([blanks, label], axis=1)  #[L, 2]
+    label = label.reshape(-1)  #[2L], -l-l-l
+    label = np.append(label, label[0])  #[2L + 1], -l-l-l-
+    return label
+
+
+def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
+                 blank_id=0) -> List[int]:
+    """ctc forced alignment.
+
+    https://distill.pub/2017/ctc/
+
+    Args:
+        ctc_probs (paddle.Tensor): hidden state sequence, 2d tensor (T, D)
+        y (paddle.Tensor): label id sequence tensor, 1d tensor (L)
+        blank_id (int): blank symbol index
+    Returns:
+        List[int]: best alignment result, (T).
+    """
+    y_insert_blank = insert_blank(y, blank_id)  #(2L+1)
+
+    log_alpha = paddle.zeros(
+        (ctc_probs.shape[0], len(y_insert_blank)))  #(T, 2L+1)
+    log_alpha = log_alpha - float('inf')  # log of zero
+
+    # TODO(Hui Zhang): zeros not support paddle.int16
+    # self.__setitem_varbase__(item, value) When assign a value to a paddle.Tensor, the data type of the paddle.Tensor not support int16
+    state_path = (paddle.zeros(
+        (ctc_probs.shape[0], len(y_insert_blank)), dtype=paddle.int32) - 1
+                  )  # state path, Tuple((T, 2L+1))
+
+    # init start state
+    # TODO(Hui Zhang): VarBase.__getitem__() not support np.int64
+    log_alpha[0, 0] = ctc_probs[0][int(y_insert_blank[0])]  # State-b, Sb
+    log_alpha[0, 1] = ctc_probs[0][int(y_insert_blank[1])]  # State-nb, Snb
+
+    for t in range(1, ctc_probs.shape[0]):  # T
+        for s in range(len(y_insert_blank)):  # 2L+1
+            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
+                    s] == y_insert_blank[s - 2]:
+                candidates = paddle.to_tensor(
+                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
+                prev_state = [s, s - 1]
+            else:
+                candidates = paddle.to_tensor([
+                    log_alpha[t - 1, s],
+                    log_alpha[t - 1, s - 1],
+                    log_alpha[t - 1, s - 2],
+                ])
+                prev_state = [s, s - 1, s - 2]
+            # TODO(Hui Zhang): VarBase.__getitem__() not support np.int64
+            log_alpha[t, s] = paddle.max(candidates) + ctc_probs[t][int(
+                y_insert_blank[s])]
+            state_path[t, s] = prev_state[paddle.argmax(candidates)]
+    # TODO(Hui Zhang): zeros not support paddle.int16
+    # self.__setitem_varbase__(item, value) When assign a value to a paddle.Tensor, the data type of the paddle.Tensor not support int16
+    state_seq = -1 * paddle.ones((ctc_probs.shape[0], 1), dtype=paddle.int32)
+
+    candidates = paddle.to_tensor([
+        log_alpha[-1, len(y_insert_blank) - 1],  # Sb
+        log_alpha[-1, len(y_insert_blank) - 2]  # Snb
+    ])
+    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
+    state_seq[-1] = prev_state[paddle.argmax(candidates)]
+    for t in range(ctc_probs.shape[0] - 2, -1, -1):
+        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
+
+    output_alignment = []
+    for t in range(0, ctc_probs.shape[0]):
+        output_alignment.append(y_insert_blank[state_seq[t, 0]])
+
+    return output_alignment
+
+
+def ctc_align(config, model, dataloader, batch_size, stride_ms, token_dict,
+              result_file):
+    """ctc alignment.
+
+    Args:
+        config (cfgNode): config
+        model (nn.Layer): U2 Model.
+        dataloader (io.DataLoader): dataloader.
+        batch_size (int): decoding batchsize.
+        stride_ms (int): audio feature stride in ms unit.
+        token_dict (List[str]): vocab list, e.g. ['blank', 'unk', 'a', 'b', '<eos>'].
+        result_file (str): alignment output file, e.g. /path/to/xxx.align.
+    """
+    if batch_size > 1:
+        print('alignment mode must be running with batch_size == 1')
+        sys.exit(1)
+    assert result_file and result_file.endswith('.align')
+
+    model.eval()
+    # conv subsampling rate
+    subsample = utility.get_subsample(config)
+    print(f"Align Total Examples: {len(dataloader.dataset)}")
+
+    with open(result_file, 'w') as fout:
+        # one example in batch
+        for i, batch in enumerate(dataloader):
+            key, feat, feats_length, target, target_length = batch
+
+            # 1. Encoder
+            encoder_out, encoder_mask = model._forward_encoder(
+                feat, feats_length)  # (B, maxlen, encoder_dim)
+            maxlen = encoder_out.shape[1]
+            ctc_probs = model.ctc.log_softmax(
+                encoder_out)  # (1, maxlen, vocab_size)
+
+            # 2. alignment
+            ctc_probs = ctc_probs.squeeze(0)
+            target = target.squeeze(0)
+            alignment = forced_align(ctc_probs, target)
+
+            print(f"align ids: {key[0]} {alignment}")
+            fout.write('{} {}\n'.format(key[0], alignment))
+
+            # 3. gen praat
+            # segment alignment
+            align_segs = text_grid.segment_alignment(alignment)
+            print(f"align tokens: {key[0]}, {align_segs}")
+
+            # IntervalTier, List["start end token\n"]
+            tierformat = text_grid.align_to_tierformat(align_segs, subsample,
+                                                       token_dict)
+
+            # write tier
+            align_output_path = Path(result_file).parent / "align"
+            align_output_path.mkdir(parents=True, exist_ok=True)
+            tier_path = align_output_path / (key[0] + ".tier")
+            with tier_path.open('w') as f:
+                f.writelines(tierformat)
+
+            # write textgrid
+            textgrid_path = align_output_path / (key[0] + ".TextGrid")
+            second_per_frame = 1. / (1000. /
+                                     stride_ms)  # 25ms window, 10ms stride
+            second_per_example = (
+                len(alignment) + 1) * subsample * second_per_frame
+            text_grid.generate_textgrid(
+                maxtime=second_per_example,
+                intervals=tierformat,
+                output=str(textgrid_path))
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/__init__.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..37ceae6e5f8c3016713c4417ea167dec9e3fdc42
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .swig_wrapper import ctc_beam_search_decoding
+from .swig_wrapper import ctc_beam_search_decoding_batch
+from .swig_wrapper import ctc_greedy_decoding
+from .swig_wrapper import CTCBeamSearchDecoder
+from .swig_wrapper import Scorer
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/decoders_deprecated.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/decoders_deprecated.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c391ead1ffd3e42ee71586ff1ea9cdd1b1d5285
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/decoders_deprecated.py
@@ -0,0 +1,235 @@
+"""Contains various CTC decoders."""
+import multiprocessing
+from itertools import groupby
+from math import log
+
+import numpy as np
+
+
+def ctc_greedy_decoder(probs_seq, vocabulary):
+    """CTC greedy (best path) decoder.
+
+    Path consisting of the most probable tokens are further post-processed to
+    remove consecutive repetitions and all blanks.
+
+    :param probs_seq: 2-D list of probabilities over the vocabulary for each
+                      character. Each element is a list of float probabilities
+                      for one character.
+    :type probs_seq: list
+    :param vocabulary: Vocabulary list.
+    :type vocabulary: list
+    :return: Decoding result string.
+    :rtype: baseline
+    """
+    # dimension verification
+    for probs in probs_seq:
+        if not len(probs) == len(vocabulary) + 1:
+            raise ValueError("probs_seq dimension mismatchedd with vocabulary")
+    # argmax to get the best index for each time step
+    max_index_list = list(np.array(probs_seq).argmax(axis=1))
+    # remove consecutive duplicate indexes
+    index_list = [index_group[0] for index_group in groupby(max_index_list)]
+    # remove blank indexes
+    blank_index = len(vocabulary)
+    index_list = [index for index in index_list if index != blank_index]
+    # convert index list to string
+    return ''.join([vocabulary[index] for index in index_list])
+
+
+def ctc_beam_search_decoder(probs_seq,
+                            beam_size,
+                            vocabulary,
+                            cutoff_prob=1.0,
+                            cutoff_top_n=40,
+                            ext_scoring_func=None,
+                            nproc=False):
+    """CTC Beam search decoder.
+
+    It utilizes beam search to approximately select top best decoding
+    labels and returning results in the descending order.
+    The implementation is based on Prefix Beam Search
+    (https://arxiv.org/abs/1408.2873), and the unclear part is
+    redesigned. Two important modifications: 1) in the iterative computation
+    of probabilities, the assignment operation is changed to accumulation for
+    one prefix may comes from different paths; 2) the if condition "if l^+ not
+    in A_prev then" after probabilities' computation is deprecated for it is
+    hard to understand and seems unnecessary.
+
+    :param probs_seq: 2-D list of probability distributions over each time
+                      step, with each element being a list of normalized
+                      probabilities over vocabulary and blank.
+    :type probs_seq: 2-D list
+    :param beam_size: Width for beam search.
+    :type beam_size: int
+    :param vocabulary: Vocabulary list.
+    :type vocabulary: list
+    :param cutoff_prob: Cutoff probability in pruning,
+                        default 1.0, no pruning.
+    :type cutoff_prob: float
+    :param ext_scoring_func: External scoring function for
+                            partially decoded sentence, e.g. word count
+                            or language model.
+    :type external_scoring_func: callable
+    :param nproc: Whether the decoder used in multiprocesses.
+    :type nproc: bool
+    :return: List of tuples of log probability and sentence as decoding
+             results, in descending order of the probability.
+    :rtype: list
+    """
+    # dimension check
+    for prob_list in probs_seq:
+        if not len(prob_list) == len(vocabulary) + 1:
+            raise ValueError("The shape of prob_seq does not match with the "
+                             "shape of the vocabulary.")
+
+    # blank_id assign
+    blank_id = len(vocabulary)
+
+    # If the decoder called in the multiprocesses, then use the global scorer
+    # instantiated in ctc_beam_search_decoder_batch().
+    if nproc is True:
+        global ext_nproc_scorer
+        ext_scoring_func = ext_nproc_scorer
+
+    # initialize
+    # prefix_set_prev: the set containing selected prefixes
+    # probs_b_prev: prefixes' probability ending with blank in previous step
+    # probs_nb_prev: prefixes' probability ending with non-blank in previous step
+    prefix_set_prev = {'\t': 1.0}
+    probs_b_prev, probs_nb_prev = {'\t': 1.0}, {'\t': 0.0}
+
+    # extend prefix in loop
+    for time_step in range(len(probs_seq)):
+        # prefix_set_next: the set containing candidate prefixes
+        # probs_b_cur: prefixes' probability ending with blank in current step
+        # probs_nb_cur: prefixes' probability ending with non-blank in current step
+        prefix_set_next, probs_b_cur, probs_nb_cur = {}, {}, {}
+
+        prob_idx = list(enumerate(probs_seq[time_step]))
+        cutoff_len = len(prob_idx)
+        # If pruning is enabled
+        if cutoff_prob < 1.0 or cutoff_top_n < cutoff_len:
+            prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True)
+            cutoff_len, cum_prob = 0, 0.0
+            for i in range(len(prob_idx)):
+                cum_prob += prob_idx[i][1]
+                cutoff_len += 1
+                if cum_prob >= cutoff_prob:
+                    break
+            cutoff_len = min(cutoff_len, cutoff_top_n)
+            prob_idx = prob_idx[0:cutoff_len]
+
+        for l in prefix_set_prev:
+            if l not in prefix_set_next:
+                probs_b_cur[l], probs_nb_cur[l] = 0.0, 0.0
+
+            # extend prefix by travering prob_idx
+            for index in range(cutoff_len):
+                c, prob_c = prob_idx[index][0], prob_idx[index][1]
+
+                if c == blank_id:
+                    probs_b_cur[l] += prob_c * (
+                        probs_b_prev[l] + probs_nb_prev[l])
+                else:
+                    last_char = l[-1]
+                    new_char = vocabulary[c]
+                    l_plus = l + new_char
+                    if l_plus not in prefix_set_next:
+                        probs_b_cur[l_plus], probs_nb_cur[l_plus] = 0.0, 0.0
+
+                    if new_char == last_char:
+                        probs_nb_cur[l_plus] += prob_c * probs_b_prev[l]
+                        probs_nb_cur[l] += prob_c * probs_nb_prev[l]
+                    elif new_char == ' ':
+                        if (ext_scoring_func is None) or (len(l) == 1):
+                            score = 1.0
+                        else:
+                            prefix = l[1:]
+                            score = ext_scoring_func(prefix)
+                        probs_nb_cur[l_plus] += score * prob_c * (
+                            probs_b_prev[l] + probs_nb_prev[l])
+                    else:
+                        probs_nb_cur[l_plus] += prob_c * (
+                            probs_b_prev[l] + probs_nb_prev[l])
+                    # add l_plus into prefix_set_next
+                    prefix_set_next[l_plus] = probs_nb_cur[
+                        l_plus] + probs_b_cur[l_plus]
+            # add l into prefix_set_next
+            prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l]
+        # update probs
+        probs_b_prev, probs_nb_prev = probs_b_cur, probs_nb_cur
+
+        # store top beam_size prefixes
+        prefix_set_prev = sorted(
+            prefix_set_next.items(), key=lambda asd: asd[1], reverse=True)
+        if beam_size < len(prefix_set_prev):
+            prefix_set_prev = prefix_set_prev[:beam_size]
+        prefix_set_prev = dict(prefix_set_prev)
+
+    beam_result = []
+    for seq, prob in prefix_set_prev.items():
+        if prob > 0.0 and len(seq) > 1:
+            result = seq[1:]
+            # score last word by external scorer
+            if (ext_scoring_func is not None) and (result[-1] != ' '):
+                prob = prob * ext_scoring_func(result)
+            log_prob = log(prob)
+            beam_result.append((log_prob, result))
+        else:
+            beam_result.append((float('-inf'), ''))
+
+    # output top beam_size decoding results
+    beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True)
+    return beam_result
+
+
+def ctc_beam_search_decoder_batch(probs_split,
+                                  beam_size,
+                                  vocabulary,
+                                  num_processes,
+                                  cutoff_prob=1.0,
+                                  cutoff_top_n=40,
+                                  ext_scoring_func=None):
+    """CTC beam search decoder using multiple processes.
+
+    :param probs_seq: 3-D list with each element as an instance of 2-D list
+                      of probabilities used by ctc_beam_search_decoder().
+    :type probs_seq: 3-D list
+    :param beam_size: Width for beam search.
+    :type beam_size: int
+    :param vocabulary: Vocabulary list.
+    :type vocabulary: list
+    :param num_processes: Number of parallel processes.
+    :type num_processes: int
+    :param cutoff_prob: Cutoff probability in pruning,
+                        default 1.0, no pruning.
+    :type cutoff_prob: float
+    :param num_processes: Number of parallel processes.
+    :type num_processes: int
+    :param ext_scoring_func: External scoring function for
+                            partially decoded sentence, e.g. word count
+                            or language model.
+    :type external_scoring_function: callable
+    :return: List of tuples of log probability and sentence as decoding
+             results, in descending order of the probability.
+    :rtype: list
+    """
+    if not num_processes > 0:
+        raise ValueError("Number of processes must be positive!")
+
+    # use global variable to pass the externnal scorer to beam search decoder
+    global ext_nproc_scorer
+    ext_nproc_scorer = ext_scoring_func
+    nproc = True
+
+    pool = multiprocessing.Pool(processes=num_processes)
+    results = []
+    for i, probs_list in enumerate(probs_split):
+        args = (probs_list, beam_size, vocabulary, cutoff_prob, cutoff_top_n,
+                None, nproc)
+        results.append(pool.apply_async(ctc_beam_search_decoder, args))
+
+    pool.close()
+    pool.join()
+    beam_search_results = [result.get() for result in results]
+    return beam_search_results
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/scorer_deprecated.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/scorer_deprecated.py
new file mode 100644
index 0000000000000000000000000000000000000000..362098fe65ec34106926e1804dfbb5abb273d97d
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/scorer_deprecated.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""External Scorer for Beam Search Decoder."""
+import os
+
+import kenlm
+import numpy as np
+
+
+class Scorer(object):
+    """External scorer to evaluate a prefix or whole sentence in
+       beam search decoding, including the score from n-gram language
+       model and word count.
+
+    :param alpha: Parameter associated with language model. Don't use
+                  language model when alpha = 0.
+    :type alpha: float
+    :param beta: Parameter associated with word count. Don't use word
+                count when beta = 0.
+    :type beta: float
+    :model_path: Path to load language model.
+    :type model_path: str
+    """
+
+    def __init__(self, alpha, beta, model_path):
+        self._alpha = alpha
+        self._beta = beta
+        if not os.path.isfile(model_path):
+            raise IOError("Invaid language model path: %s" % model_path)
+        self._language_model = kenlm.LanguageModel(model_path)
+
+    # n-gram language model scoring
+    def _language_model_score(self, sentence):
+        #log10 prob of last word
+        log_cond_prob = list(
+            self._language_model.full_scores(sentence, eos=False))[-1][0]
+        return np.power(10, log_cond_prob)
+
+    # word insertion term
+    def _word_count(self, sentence):
+        words = sentence.strip().split(' ')
+        return len(words)
+
+    # reset alpha and beta
+    def reset_params(self, alpha, beta):
+        self._alpha = alpha
+        self._beta = beta
+
+    # execute evaluation
+    def __call__(self, sentence, log=False):
+        """Evaluation function, gathering all the different scores
+        and return the final one.
+
+        :param sentence: The input sentence for evaluation
+        :type sentence: str
+        :param log: Whether return the score in log representation.
+        :type log: bool
+        :return: Evaluation score, in the decimal or log.
+        :rtype: float
+        """
+        lm = self._language_model_score(sentence)
+        word_cnt = self._word_count(sentence)
+        if log is False:
+            score = np.power(lm, self._alpha) * np.power(word_cnt, self._beta)
+        else:
+            score = self._alpha * np.log(lm) + self._beta * np.log(word_cnt)
+        return score
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/swig_wrapper.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/swig_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4eb43a50807bb602c7259a96adca47e60ee98ad
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/swig_wrapper.py
@@ -0,0 +1,146 @@
+"""Wrapper for various CTC decoders in SWIG."""
+import paddlespeech_ctcdecoders
+
+
+class Scorer(paddlespeech_ctcdecoders.Scorer):
+    """Wrapper for Scorer.
+
+    :param alpha: Parameter associated with language model. Don't use
+                  language model when alpha = 0.
+    :type alpha: float
+    :param beta: Parameter associated with word count. Don't use word
+                 count when beta = 0.
+    :type beta: float
+    :model_path: Path to load language model.
+    :type model_path: str
+    :param vocabulary: Vocabulary list.
+    :type vocabulary: list
+    """
+
+    def __init__(self, alpha, beta, model_path, vocabulary):
+        paddlespeech_ctcdecoders.Scorer.__init__(self, alpha, beta, model_path,
+                                                 vocabulary)
+
+
+def ctc_greedy_decoding(probs_seq, vocabulary, blank_id):
+    """Wrapper for ctc best path decodeing function in swig.
+
+    :param probs_seq: 2-D list of probability distributions over each time
+                      step, with each element being a list of normalized
+                      probabilities over vocabulary and blank.
+    :type probs_seq: 2-D list
+    :param vocabulary: Vocabulary list.
+    :type vocabulary: list
+    :return: Decoding result string.
+    :rtype: str
+    """
+    result = paddlespeech_ctcdecoders.ctc_greedy_decoding(probs_seq.tolist(),
+                                                          vocabulary, blank_id)
+    return result
+
+
+def ctc_beam_search_decoding(probs_seq,
+                             vocabulary,
+                             beam_size,
+                             cutoff_prob=1.0,
+                             cutoff_top_n=40,
+                             ext_scoring_func=None,
+                             blank_id=0):
+    """Wrapper for the CTC Beam Search Decoding function.
+
+    :param probs_seq: 2-D list of probability distributions over each time
+                      step, with each element being a list of normalized
+                      probabilities over vocabulary and blank.
+    :type probs_seq: 2-D list
+    :param vocabulary: Vocabulary list.
+    :type vocabulary: list
+    :param beam_size: Width for beam search.
+    :type beam_size: int
+    :param cutoff_prob: Cutoff probability in pruning,
+                        default 1.0, no pruning.
+    :type cutoff_prob: float
+    :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
+                         characters with highest probs in vocabulary will be
+                         used in beam search, default 40.
+    :type cutoff_top_n: int
+    :param ext_scoring_func: External scoring function for
+                             partially decoded sentence, e.g. word count
+                             or language model.
+    :type external_scoring_func: callable
+    :return: List of tuples of log probability and sentence as decoding
+             results, in descending order of the probability.
+    :rtype: list
+    """
+    beam_results = paddlespeech_ctcdecoders.ctc_beam_search_decoding(
+        probs_seq.tolist(), vocabulary, beam_size, cutoff_prob, cutoff_top_n,
+        ext_scoring_func, blank_id)
+    beam_results = [(res[0], res[1].decode('utf-8')) for res in beam_results]
+    return beam_results
+
+
+def ctc_beam_search_decoding_batch(probs_split,
+                                   vocabulary,
+                                   beam_size,
+                                   num_processes,
+                                   cutoff_prob=1.0,
+                                   cutoff_top_n=40,
+                                   ext_scoring_func=None,
+                                   blank_id=0):
+    """Wrapper for the batched CTC beam search decodeing batch function.
+
+    :param probs_seq: 3-D list with each element as an instance of 2-D list
+                      of probabilities used by ctc_beam_search_decoder().
+    :type probs_seq: 3-D list
+    :param vocabulary: Vocabulary list.
+    :type vocabulary: list
+    :param beam_size: Width for beam search.
+    :type beam_size: int
+    :param num_processes: Number of parallel processes.
+    :type num_processes: int
+    :param cutoff_prob: Cutoff probability in vocabulary pruning,
+                        default 1.0, no pruning.
+    :type cutoff_prob: float
+    :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
+                         characters with highest probs in vocabulary will be
+                         used in beam search, default 40.
+    :type cutoff_top_n: int
+    :param num_processes: Number of parallel processes.
+    :type num_processes: int
+    :param ext_scoring_func: External scoring function for
+                             partially decoded sentence, e.g. word count
+                             or language model.
+    :type external_scoring_function: callable
+    :return: List of tuples of log probability and sentence as decoding
+             results, in descending order of the probability.
+    :rtype: list
+    """
+    probs_split = [probs_seq.tolist() for probs_seq in probs_split]
+
+    batch_beam_results = paddlespeech_ctcdecoders.ctc_beam_search_decoding_batch(
+        probs_split, vocabulary, beam_size, num_processes, cutoff_prob,
+        cutoff_top_n, ext_scoring_func, blank_id)
+    batch_beam_results = [[(res[0], res[1]) for res in beam_results]
+                          for beam_results in batch_beam_results]
+    return batch_beam_results
+
+
+class CTCBeamSearchDecoder(paddlespeech_ctcdecoders.CtcBeamSearchDecoderBatch):
+    """Wrapper for CtcBeamSearchDecoderBatch.
+    Args:
+        vocab_list (list): Vocabulary list.
+        beam_size (int): Width for beam search.
+        num_processes (int): Number of parallel processes.
+        param cutoff_prob (float): Cutoff probability in vocabulary pruning,
+                            default 1.0, no pruning.
+        cutoff_top_n (int): Cutoff number in pruning, only top cutoff_top_n
+                            characters with highest probs in vocabulary will be
+                            used in beam search, default 40.
+        param ext_scorer (Scorer): External scorer for partially decoded sentence, e.g. word count
+                                or language model.
+    """
+
+    def __init__(self, vocab_list, batch_size, beam_size, num_processes,
+                 cutoff_prob, cutoff_top_n, _ext_scorer, blank_id):
+        paddlespeech_ctcdecoders.CtcBeamSearchDecoderBatch.__init__(
+            self, vocab_list, batch_size, beam_size, num_processes, cutoff_prob,
+            cutoff_top_n, _ext_scorer, blank_id)
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/test_decoders.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/test_decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc344b763de4b2b4e93e4c452ad40a4e79022f5e
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/ctcdecoder/test_decoders.py
@@ -0,0 +1,87 @@
+"""Test decoders."""
+import unittest
+
+import decoders_deprecated as decoder
+
+
+class TestDecoders(unittest.TestCase):
+    def setUp(self):
+        self.vocab_list = ["\'", ' ', 'a', 'b', 'c', 'd']
+        self.beam_size = 20
+        self.probs_seq1 = [[
+            0.06390443, 0.21124858, 0.27323887, 0.06870235, 0.0361254,
+            0.18184413, 0.16493624
+        ], [
+            0.03309247, 0.22866108, 0.24390638, 0.09699597, 0.31895462,
+            0.0094893, 0.06890021
+        ], [
+            0.218104, 0.19992557, 0.18245131, 0.08503348, 0.14903535,
+            0.08424043, 0.08120984
+        ], [
+            0.12094152, 0.19162472, 0.01473646, 0.28045061, 0.24246305,
+            0.05206269, 0.09772094
+        ], [
+            0.1333387, 0.00550838, 0.00301669, 0.21745861, 0.20803985,
+            0.41317442, 0.01946335
+        ], [
+            0.16468227, 0.1980699, 0.1906545, 0.18963251, 0.19860937,
+            0.04377724, 0.01457421
+        ]]
+        self.probs_seq2 = [[
+            0.08034842, 0.22671944, 0.05799633, 0.36814645, 0.11307441,
+            0.04468023, 0.10903471
+        ], [
+            0.09742457, 0.12959763, 0.09435383, 0.21889204, 0.15113123,
+            0.10219457, 0.20640612
+        ], [
+            0.45033529, 0.09091417, 0.15333208, 0.07939558, 0.08649316,
+            0.12298585, 0.01654384
+        ], [
+            0.02512238, 0.22079203, 0.19664364, 0.11906379, 0.07816055,
+            0.22538587, 0.13483174
+        ], [
+            0.17928453, 0.06065261, 0.41153005, 0.1172041, 0.11880313,
+            0.07113197, 0.04139363
+        ], [
+            0.15882358, 0.1235788, 0.23376776, 0.20510435, 0.00279306,
+            0.05294827, 0.22298418
+        ]]
+        self.greedy_result = ["ac'bdc", "b'da"]
+        self.beam_search_result = ['acdc', "b'a"]
+
+    def test_greedy_decoder_1(self):
+        bst_result = decoder.ctc_greedy_decoder(self.probs_seq1,
+                                                self.vocab_list)
+        self.assertEqual(bst_result, self.greedy_result[0])
+
+    def test_greedy_decoder_2(self):
+        bst_result = decoder.ctc_greedy_decoder(self.probs_seq2,
+                                                self.vocab_list)
+        self.assertEqual(bst_result, self.greedy_result[1])
+
+    def test_beam_search_decoder_1(self):
+        beam_result = decoder.ctc_beam_search_decoder(
+            probs_seq=self.probs_seq1,
+            beam_size=self.beam_size,
+            vocabulary=self.vocab_list)
+        self.assertEqual(beam_result[0][1], self.beam_search_result[0])
+
+    def test_beam_search_decoder_2(self):
+        beam_result = decoder.ctc_beam_search_decoder(
+            probs_seq=self.probs_seq2,
+            beam_size=self.beam_size,
+            vocabulary=self.vocab_list)
+        self.assertEqual(beam_result[0][1], self.beam_search_result[1])
+
+    def test_beam_search_decoder_batch(self):
+        beam_results = decoder.ctc_beam_search_decoder_batch(
+            probs_split=[self.probs_seq1, self.probs_seq2],
+            beam_size=self.beam_size,
+            vocabulary=self.vocab_list,
+            num_processes=24)
+        self.assertEqual(beam_results[0][0][1], self.beam_search_result[0])
+        self.assertEqual(beam_results[1][0][1], self.beam_search_result[1])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/loss.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..d082135a422f7edd567c203e8d7f9edfac0231e0
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/loss.py
@@ -0,0 +1,166 @@
+import inspect
+
+import paddle
+from paddle import nn
+from paddle.nn import functional as F
+
+
+__all__ = ['CTCLoss', "LabelSmoothingLoss"]
+
+
+class CTCLoss(nn.Layer):
+    def __init__(self,
+                 blank=0,
+                 reduction='sum',
+                 batch_average=False,
+                 grad_norm_type=None):
+        super().__init__()
+        # last token id as blank id
+        self.loss = nn.CTCLoss(blank=blank, reduction=reduction)
+        self.batch_average = batch_average
+
+        print(f"CTCLoss Loss reduction: {reduction}, div-bs: {batch_average}")
+        print(f"CTCLoss Grad Norm Type: {grad_norm_type}")
+
+        assert grad_norm_type in ('instance', 'batch', 'frame', None)
+        self.norm_by_times = False
+        self.norm_by_batchsize = False
+        self.norm_by_total_logits_len = False
+        if grad_norm_type is None:
+            # no grad norm
+            pass
+        elif grad_norm_type == 'instance':
+            self.norm_by_times = True
+        elif grad_norm_type == 'batch':
+            self.norm_by_batchsize = True
+        elif grad_norm_type == 'frame':
+            self.norm_by_total_logits_len = True
+        else:
+            raise ValueError(f"CTCLoss Grad Norm no support {grad_norm_type}")
+        kwargs = {
+            "norm_by_times": self.norm_by_times,
+            "norm_by_batchsize": self.norm_by_batchsize,
+            "norm_by_total_logits_len": self.norm_by_total_logits_len,
+        }
+
+        # Derive only the args which the func has
+        try:
+            param = inspect.signature(self.loss.forward).parameters
+        except ValueError:
+            # Some function, e.g. built-in function, are failed
+            param = {}
+        self._kwargs = {k: v for k, v in kwargs.items() if k in param}
+        _notin = {k: v for k, v in kwargs.items() if k not in param}
+        print(f"{self.loss} kwargs:{self._kwargs}, not support: {_notin}")
+
+    def forward(self, logits, ys_pad, hlens, ys_lens):
+        """Compute CTC loss.
+
+        Args:
+            logits ([paddle.Tensor]): [B, Tmax, D]
+            ys_pad ([paddle.Tensor]): [B, Tmax]
+            hlens ([paddle.Tensor]): [B]
+            ys_lens ([paddle.Tensor]): [B]
+
+        Returns:
+            [paddle.Tensor]: scalar. If reduction is 'none', then (N), where N = \text{batch size}.
+        """
+        B = logits.shape[0]
+        # warp-ctc need logits, and do softmax on logits by itself
+        # warp-ctc need activation with shape [T, B, V + 1]
+        # logits: (B, L, D) -> (L, B, D)
+        logits = logits.transpose([1, 0, 2])
+        ys_pad = ys_pad.astype(paddle.int32)
+        loss = self.loss(logits, ys_pad, hlens, ys_lens, **self._kwargs)
+        if self.batch_average:
+            # Batch-size average
+            loss = loss / B
+        return loss
+
+
+class LabelSmoothingLoss(nn.Layer):
+    """Label-smoothing loss.
+    In a standard CE loss, the label's data distribution is:
+        [0,1,2] ->
+        [
+            [1.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0],
+            [0.0, 0.0, 1.0],
+        ]
+    In the smoothing version CE Loss,some probabilities
+    are taken from the true label prob (1.0) and are divided
+    among other labels.
+        e.g.
+        smoothing=0.1
+        [0,1,2] ->
+        [
+            [0.9, 0.05, 0.05],
+            [0.05, 0.9, 0.05],
+            [0.05, 0.05, 0.9],
+        ]
+
+    """
+
+    def __init__(self,
+                 size: int,
+                 padding_idx: int,
+                 smoothing: float,
+                 normalize_length: bool=False):
+        """Label-smoothing loss.
+
+        Args:
+            size (int): the number of class
+            padding_idx (int): padding class id which will be ignored for loss
+            smoothing (float): smoothing rate (0.0 means the conventional CE)
+            normalize_length (bool):
+                True, normalize loss by sequence length;
+                False, normalize loss by batch size.
+                Defaults to False.
+        """
+        super().__init__()
+        self.size = size
+        self.padding_idx = padding_idx
+        self.smoothing = smoothing
+        self.confidence = 1.0 - smoothing
+        self.normalize_length = normalize_length
+        self.criterion = nn.KLDivLoss(reduction="none")
+
+    def forward(self, x: paddle.Tensor, target: paddle.Tensor) -> paddle.Tensor:
+        """Compute loss between x and target.
+        The model outputs and data labels tensors are flatten to
+        (batch*seqlen, class) shape and a mask is applied to the
+        padding part which should not be calculated for loss.
+
+        Args:
+            x (paddle.Tensor): prediction (batch, seqlen, class)
+            target (paddle.Tensor):
+                target signal masked with self.padding_id (batch, seqlen)
+        Returns:
+            loss (paddle.Tensor) : The KL loss, scalar float value
+        """
+        B, T, D = x.shape
+        assert D == self.size
+        x = x.reshape((-1, self.size))
+        target = target.reshape([-1])
+
+        # use zeros_like instead of torch.no_grad() for true_dist,
+        # since no_grad() can not be exported by JIT
+        true_dist = paddle.full_like(x, self.smoothing / (self.size - 1))
+        ignore = target == self.padding_idx  # (B,)
+
+        #TODO(Hui Zhang): target = target * (1 - ignore)  # avoid -1 index
+        target = target.masked_fill(ignore, 0)  # avoid -1 index
+        # true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
+        target_mask = F.one_hot(target, self.size)
+        true_dist *= (1 - target_mask)
+        true_dist += target_mask * self.confidence
+
+        kl = self.criterion(F.log_softmax(x, axis=1), true_dist)
+
+        #TODO(Hui Zhang): sum not support bool type
+        #total = len(target) - int(ignore.sum())
+        total = len(target) - int(ignore.type_as(target).sum())
+        denom = total if self.normalize_length else B
+        #numer = (kl * (1 - ignore)).sum()
+        numer = kl.masked_fill(ignore.unsqueeze(1), 0).sum()
+        return numer / denom
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/utils/text_grid.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/utils/text_grid.py
new file mode 100644
index 0000000000000000000000000000000000000000..4865249c3a235a5f5fe3b8a73c308c96d2d69415
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/utils/text_grid.py
@@ -0,0 +1,114 @@
+from typing import Dict
+from typing import List
+from typing import Text
+
+import textgrid
+
+
+def segment_alignment(alignment: List[int], blank_id=0) -> List[List[int]]:
+    """segment ctc alignment ids by continuous blank and repeat label.
+
+    Args:
+        alignment (List[int]): ctc alignment id sequence.
+            e.g. [0, 0, 0, 1, 1, 1, 2, 0, 0, 3]
+        blank_id (int, optional): blank id. Defaults to 0.
+
+    Returns:
+        List[List[int]]: token align, segment aligment id sequence.
+            e.g. [[0, 0, 0, 1, 1, 1], [2], [0, 0, 3]]
+    """
+    # convert alignment to a praat format, which is a doing phonetics
+    # by computer and helps analyzing alignment
+    align_segs = []
+    # get frames level duration for each token
+    start = 0
+    end = 0
+    while end < len(alignment):
+        while end < len(alignment) and alignment[end] == blank_id:  # blank
+            end += 1
+        if end == len(alignment):
+            align_segs[-1].extend(alignment[start:])
+            break
+        end += 1
+        while end < len(alignment) and alignment[end - 1] == alignment[
+                end]:  # repeat label
+            end += 1
+        align_segs.append(alignment[start:end])
+        start = end
+    return align_segs
+
+
+def align_to_tierformat(align_segs: List[List[int]],
+                        subsample: int,
+                        token_dict: Dict[int, Text],
+                        blank_id=0) -> List[Text]:
+    """Generate textgrid.Interval format from alignment segmentations.
+
+    Args:
+        align_segs (List[List[int]]): segmented ctc alignment ids.
+        subsample (int): 25ms frame_length, 10ms hop_length, 1/subsample
+        token_dict (Dict[int, Text]): int -> str map.
+
+    Returns:
+        List[Text]: list of textgrid.Interval text, str(start, end, text).
+    """
+    hop_length = 10  # ms
+    second_ms = 1000  # ms
+    frame_per_second = second_ms / hop_length  # 25ms frame_length, 10ms hop_length
+    second_per_frame = 1.0 / frame_per_second
+
+    begin = 0
+    duration = 0
+    tierformat = []
+
+    for idx, tokens in enumerate(align_segs):
+        token_len = len(tokens)
+        token = tokens[-1]
+        # time duration in second
+        duration = token_len * subsample * second_per_frame
+        if idx < len(align_segs) - 1:
+            print(f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}")
+            tierformat.append(
+                f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}\n")
+        else:
+            for i in tokens:
+                if i != blank_id:
+                    token = i
+                    break
+            print(f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}")
+            tierformat.append(
+                f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}\n")
+        begin = begin + duration
+
+    return tierformat
+
+
+def generate_textgrid(maxtime: float,
+                      intervals: List[Text],
+                      output: Text,
+                      name: Text='ali') -> None:
+    """Create alignment textgrid file.
+
+    Args:
+        maxtime (float): audio duartion.
+        intervals (List[Text]): ctc output alignment. e.g. "start-time end-time word" per item.
+        output (Text): textgrid filepath.
+        name (Text, optional): tier or layer name. Defaults to 'ali'.
+    """
+    # Download Praat: https://www.fon.hum.uva.nl/praat/
+    avg_interval = maxtime / (len(intervals) + 1)
+    print(f"average second/token: {avg_interval}")
+    margin = 0.0001
+
+    tg = textgrid.TextGrid(maxTime=maxtime)
+    tier = textgrid.IntervalTier(name=name, maxTime=maxtime)
+
+    i = 0
+    for dur in intervals:
+        s, e, text = dur.split()
+        tier.add(minTime=float(s) + margin, maxTime=float(e), mark=text)
+
+    tg.append(tier)
+
+    tg.write(output)
+    print("successfully generator textgrid {}.".format(output))
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/decoder/utils/utility.py b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/utils/utility.py
new file mode 100644
index 0000000000000000000000000000000000000000..29e758733fe7de7a394b43f964e6e017a5c57151
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/decoder/utils/utility.py
@@ -0,0 +1,79 @@
+"""Contains common utility functions."""
+import math
+import os
+import random
+import sys
+from contextlib import contextmanager
+from pprint import pformat
+from typing import List
+
+import distutils.util
+import numpy as np
+import paddle
+import soundfile
+
+
+__all__ = ["all_version", "UpdateConfig", "seed_all", "log_add"]
+
+
+def all_version():
+    vers = {
+        "python": sys.version,
+        "paddle": paddle.__version__,
+        "paddle_commit": paddle.version.commit,
+        "soundfile": soundfile.__version__,
+    }
+    print(f"Deps Module Version:{pformat(list(vers.items()))}")
+
+
+@contextmanager
+def UpdateConfig(config):
+    """Update yacs config"""
+    config.defrost()
+    yield
+    config.freeze()
+
+
+def seed_all(seed: int=20210329):
+    """freeze random generator seed."""
+    np.random.seed(seed)
+    random.seed(seed)
+    paddle.seed(seed)
+
+
+def log_add(args: List[int]) -> float:
+    """Stable log add
+
+    Args:
+        args (List[int]): log scores
+
+    Returns:
+        float: sum of log scores
+    """
+    if all(a == -float('inf') for a in args):
+        return -float('inf')
+    a_max = max(args)
+    lsp = math.log(sum(math.exp(a - a_max) for a in args))
+    return a_max + lsp
+
+
+def get_subsample(config):
+    """Subsample rate from config.
+
+    Args:
+        config (yacs.config.CfgNode): yaml config
+
+    Returns:
+        int: subsample rate.
+    """
+    if config['encoder'] == 'squeezeformer':
+        return 4
+    else:
+        input_layer = config["encoder_conf"]["input_layer"]
+        assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
+    if input_layer == "conv2d":
+        return 4
+    elif input_layer == "conv2d6":
+        return 6
+    elif input_layer == "conv2d8":
+        return 8
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/inference.py b/models/speech/speech_recognition/deepspeech2/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac4e4ddbad28b7d0f3297ae230c5f5e9f307e017
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/inference.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import time
+import glob
+import json
+import random
+import argparse
+import numpy as np
+from tqdm import tqdm
+
+import torch
+import paddle
+import tensorrt
+from tensorrt import Dims
+from cuda import cuda, cudart
+
+from transform import Transformation
+from dataset import LibriSpeech
+from decoder import CTCDecoder
+
+from utils import VOCABLIST as vocab_list
+from utils.error_rate import wer
+from utils import deepspeech2_trtapi_ixrt, setup_io_bindings
+from load_ixrt_plugin import load_ixrt_plugin
+
+load_ixrt_plugin()
+
+def parse_config():
+    parser = argparse.ArgumentParser(description="The DeepSpeech2 network Inference on LibriSpeech dataset.")
+    parser.add_argument(
+        "--model_type",
+        type=str,
+        default="DeepSpeech2",
+        help="The speech recognition model(DeepSpeech2)",
+    )
+    parser.add_argument(
+        "--preprocess_config",
+        type=str,
+        default="data/preprocess.yaml",
+        help="The preprocess input file",
+    )
+    parser.add_argument(
+        "--engine_file",
+        type=str,
+        default="../../../../../data/checkpoints/deepspeech2/deepspeech2.engine",
+        help="engine file path"
+    )
+    parser.add_argument(
+        "--decoder_file",
+        type=str,
+        default="../../../../../data/checkpoints/deepspeech2/decoder.pdparams",
+        help="ctcdecoder checkpoints file"
+    )
+    parser.add_argument(
+        "--lang_model_path",
+        type=str,
+        default="../../../../../data/checkpoints/deepspeech2/lm/common_crawl_00.prune01111.trie.klm",
+        help="The language model path"
+    )
+    # dataset
+    parser.add_argument(
+        '--dataroot',
+        default="../../../../../data/datasets/LibriSpeech",
+        help='location to download dataset(s)'
+    )
+    parser.add_argument("--bsz", type=int, default=1, help="Dynamic input")
+    parser.add_argument("--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4")
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument("--wer_target", type=float, default=-1.0)
+    parser.add_argument("--test_num_samples", type=int, default=-1)
+
+    config = parser.parse_args()
+    return config
+
+
+def test_result(data, engine, context, decoder, test_num_samples):
+
+    input_name = "input"
+    output_name = "output"
+
+    data_len = len(data)
+    wer_sum = 0.0
+
+    if test_num_samples != -1:
+        data_len = test_num_samples
+
+    for i in tqdm(range(data_len), desc="Testing WER"):
+
+        start_time = time.time()
+        audio, text = data[i]
+        audio_shape = audio.shape
+        # print(f"audio_shape: {audio_shape}")
+
+        # Set the input shape
+        input_idx = engine.get_binding_index(input_name)
+        context.set_binding_shape(input_idx, Dims(audio_shape))
+
+        inputs, outputs, allocations = setup_io_bindings(engine, context)
+        pred_output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+        err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], audio, audio.nbytes)
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+
+        if config.use_async:
+            stream = cuda.Stream()
+            context.execute_async_v2(allocations, stream.handle)
+            stream.synchronize()
+        else:
+            context.execute_v2(allocations)
+
+        err, = cuda.cuMemcpyDtoH(pred_output, outputs[0]["allocation"], outputs[0]["nbytes"])
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+
+        eouts = paddle.to_tensor(pred_output)
+        eouts_len = paddle.to_tensor([eouts.shape[1]])
+        probs = decoder.softmax(eouts)
+        batch_size = probs.shape[0]
+
+        decoder.init_decoder(
+                batch_size,
+                vocab_list,
+                "ctc_beam_search",
+                config.lang_model_path,
+                1.9,
+                0.3,
+                500,
+                1.0,
+                40,
+                8
+        )
+        decoder.reset_decoder(batch_size=batch_size)
+        decoder.next(probs, eouts_len)
+        trans_best, trans_beam = decoder.decode()
+        # print(f"result_transcripts: {trans_best}")
+        # print(f"text: {text}")
+        cur_wer = wer(text, trans_best[0], True)
+        print(f"wer: {cur_wer}")
+        wer_sum += cur_wer
+
+    wer_avg = wer_sum / data_len
+    print(f"wer_avg: {wer_avg}")
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["wer_avg"] = round(wer_avg, 3)
+    print(metricResult)
+    return wer_avg
+
+
+def main(config):
+
+    # Step1:build dataset
+    preprocessing = Transformation(config.preprocess_config)
+    dataset = LibriSpeech(config.dataroot, preprocessing)
+
+    # Step2: load engine
+    engine, context = deepspeech2_trtapi_ixrt(config.engine_file)
+
+    # Step3: load decoder
+    decoder = CTCDecoder(
+            odim=31,
+            enc_n_units=2048,
+            blank_id=0,
+            dropout_rate=0.0,
+            reduction=True,
+            batch_average=True,
+            grad_norm_type=None
+    )
+    decoder_state_dict = paddle.load(config.decoder_file)
+    decoder.set_state_dict(decoder_state_dict)
+
+    # Step4: run test
+    wer = test_result(dataset, engine, context, decoder, config.test_num_samples)
+    status = 'Pass' if wer <= config.wer_target else 'Fail'
+
+    print("="*30)
+    print(f"\nCheck AUC:     Test : {wer}    Target:{config.wer_target}   State : {status}")
+    print("="*30)
+
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/inference_demo.py b/models/speech/speech_recognition/deepspeech2/ixrt/inference_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..3385eadcfeb4466407897a6d4cf94e1258888e49
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/inference_demo.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import time
+import glob
+import json
+import random
+import argparse
+import soundfile
+import numpy as np
+from tqdm import tqdm
+
+import torch
+import paddle
+import tensorrt
+from tensorrt import Dims
+from cuda import cuda, cudart
+
+from transform import Transformation
+from decoder import CTCDecoder
+
+from utils import VOCABLIST as vocab_list
+from utils import deepspeech2_trtapi_ixrt, setup_io_bindings
+
+from load_ixrt_plugin import load_ixrt_plugin
+
+load_ixrt_plugin()
+
+def parse_config():
+    parser = argparse.ArgumentParser(description="The DeepSpeech2 network Inference demo and performance.")
+    parser.add_argument(
+        "--model_type",
+        type=str,
+        default="DeepSpeech2",
+        help="The speech recognition model(DeepSpeech2)",
+    )
+    parser.add_argument(
+        "--audio_file",
+        type=str,
+        default="data/demo_002_en.wav",
+        help="The input speech wave",
+    )
+    parser.add_argument(
+        "--preprocess_config",
+        type=str,
+        default="data/preprocess.yaml",
+        help="The preprocess input file",
+    )
+    parser.add_argument(
+        "--engine_file",
+        type=str,
+        default="../../../../../data/checkpoints/deepspeech2/deepspeech2.engine",
+        help="engine file path"
+    )
+    parser.add_argument(
+        "--decoder_file",
+        type=str,
+        default="../../../../../data/checkpoints/deepspeech2/decoder.pdparams",
+        help="ctcdecoder checkpoints file"
+    )
+    parser.add_argument(
+        "--lang_model_path",
+        type=str,
+        default="../../../../../data/checkpoints/deepspeech2/lm/common_crawl_00.prune01111.trie.klm",
+        help="The language model path"
+    )
+    parser.add_argument("--bsz", type=int, default=1, help="Dynamic input")
+    parser.add_argument("--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4")
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument("--run_loop", type=int, default=-1)
+    parser.add_argument("--warm_up", type=int, default=-1)
+    parser.add_argument("--throughput_target", type=float, default=-1.0)
+
+    config = parser.parse_args()
+    return config
+
+
+def main(config):
+    # Step1: Load the input wave
+    assert os.path.isfile(config.audio_file), "The input audio file must be existed!"
+    audio, sample_rate = soundfile.read(config.audio_file, dtype="int16", always_2d=True)
+    audio = audio[:, 0]
+    print(f"audio shape: {audio.shape}")
+
+    # fbank
+    preprocess_args = {"train": False}
+    preprocessing = Transformation(config.preprocess_config)
+    input_data = preprocessing(audio, **preprocess_args)
+    input_data = np.expand_dims(input_data.astype(np.float32), axis=0)
+    print(f"feat shape: {input_data.shape}")
+
+    # Step2: Load the engine
+    engine, context = deepspeech2_trtapi_ixrt(config.engine_file)
+
+    input_shape = input_data.shape
+    print("input shape: ", input_shape)
+
+    input_idx = engine.get_binding_index("input")
+    context.set_binding_shape(input_idx, Dims(input_shape))
+
+    inputs, outputs, allocations = setup_io_bindings(engine, context)
+    pred_output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+
+    err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], input_data, input_data.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+
+    print("\n Warm Up Start.")
+    for i in range(config.warm_up): context.execute_v2(allocations)
+    print("Warm Up Done.")
+
+    run_times = []
+    for i in range(config.run_loop):
+        start_time = time.time()
+        context.execute_v2(allocations)
+        end_time = time.time()
+        run_times.append(end_time - start_time)
+
+    run_times.remove(max(run_times))
+    run_times.remove(min(run_times))
+
+    avg_time = sum(run_times) / len(run_times)
+    throughput = pred_output.shape[1] / avg_time
+    print(f"Executing {config.run_loop} done, Time: {avg_time}, ThroughPut: {throughput}")
+
+    err, = cuda.cuMemcpyDtoH(pred_output, outputs[0]["allocation"], outputs[0]["nbytes"])
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+
+    # Step3: Load the CTCDecoder
+    decoder = CTCDecoder(
+            odim=31,
+            enc_n_units=2048,
+            blank_id=0,
+            dropout_rate=0.0,
+            reduction=True,
+            batch_average=True,
+            grad_norm_type=None
+    )
+    decoder_state_dict = paddle.load(config.decoder_file)
+    decoder.set_state_dict(decoder_state_dict)
+
+    eouts = paddle.to_tensor(pred_output)
+    eouts_len = paddle.to_tensor([eouts.shape[1]])
+    probs = decoder.softmax(eouts)
+    batch_size = probs.shape[0]
+
+    decoder.init_decoder(
+            batch_size,
+            vocab_list,
+            "ctc_beam_search",
+            config.lang_model_path,
+            1.9,
+            0.3,
+            500,
+            1.0,
+            40,
+            8
+    )
+
+    decoder.reset_decoder(batch_size=batch_size)
+    decoder.next(probs, eouts_len)
+    trans_best, trans_beam = decoder.decode()
+    print("result_transcripts: ", trans_best)
+
+    status = 'Pass' if throughput >= config.throughput_target else 'Fail'
+
+    print("="*30)
+    print(f"\nCheck ThroughPut:     Test : {throughput}    Target:{config.throughput_target}   State : {status}")
+    print("="*30)
+
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["ThroughPut"] = round(throughput, 3)
+    print(metricResult)
+
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/load_ixrt_plugin.py b/models/speech/speech_recognition/deepspeech2/ixrt/load_ixrt_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..b40f69103ed16c1a2ec127fd5b9344f4b079fdce
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/load_ixrt_plugin.py
@@ -0,0 +1,13 @@
+from os.path import join, dirname, exists
+import tensorrt as trt
+import ctypes
+
+def load_ixrt_plugin(logger=trt.Logger(trt.Logger.WARNING), namespace="", dynamic_path=""):
+    if not dynamic_path:
+        dynamic_path = join(dirname(trt.__file__), "lib", "libixrt_plugin.so")
+    if not exists(dynamic_path):
+        raise FileNotFoundError(
+            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
+    ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL)
+    trt.init_libnvinfer_plugins(logger, namespace)
+    print(f"Loaded plugin from {dynamic_path}")
\ No newline at end of file
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/modify_model_to_dynamic.py b/models/speech/speech_recognition/deepspeech2/ixrt/modify_model_to_dynamic.py
new file mode 100644
index 0000000000000000000000000000000000000000..b168d3c48100442eadf7054ce85e21163ea56f64
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/modify_model_to_dynamic.py
@@ -0,0 +1,28 @@
+import onnx
+from onnx import helper
+import argparse
+
+def modify_to_dynamic(arg):
+    model = onnx.load(args.static_onnx)
+
+    graph = model.graph
+    for input_node in graph.input:
+        if input_node.name == 'input':
+            input_shape = input_node.type.tensor_type.shape.dim
+            input_shape[0].dim_value = 1
+            input_shape[1].dim_param = 'None'
+            input_shape[2].dim_value = 161
+
+    onnx.save(model, args.dynamic_onnx)
+    onnx.checker.check_model(model, full_check=True)
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="modify static shape to dynamic for deepspeech2")
+    parser.add_argument("--static_onnx", type=str, required=True, help="The input static onnx path")
+    parser.add_argument("--dynamic_onnx", type=str, required=True, help="The ouput dynamic onnx path")
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    modify_to_dynamic(args)
\ No newline at end of file
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/scripts/infer_deepspeech2_fp16_accuracy.sh b/models/speech/speech_recognition/deepspeech2/ixrt/scripts/infer_deepspeech2_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1d10f6f3f4b10729dcf15d85e3bf3d07ec7c475d
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/scripts/infer_deepspeech2_fp16_accuracy.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+DATASETS_DIR="${DATASETS_DIR:-/path/to/LibriSpeech}"
+CHECKPOINTS_DIR="${CHECKPOINTS_DIR:-./checkpoints}"
+RUN_DIR="${RUN_DIR:-.}"
+
+TGT=-1
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+cd ${RUN_DIR}
+python3 inference.py \
+        --model_type "deepspeech2" \
+        --engine_file "${CHECKPOINTS_DIR}/deepspeech2.engine" \
+        --decoder_file "data/decoder.pdparams" \
+        --lang_model_path "${CHECKPOINTS_DIR}/common_crawl_00.prune01111.trie.klm" \
+        --dataroot "${DATASETS_DIR}" \
+        --wer_target ${TGT} \
+        --test_num_samples 500
\ No newline at end of file
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/scripts/infer_deepspeech2_fp16_performance.sh b/models/speech/speech_recognition/deepspeech2/ixrt/scripts/infer_deepspeech2_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e3adad2ac82b420ffb8b9572fcd423af13d8b2d6
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/scripts/infer_deepspeech2_fp16_performance.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+DATASETS_DIR="${DATASETS_DIR:-/path/to/LibriSpeech}"
+CHECKPOINTS_DIR="${CHECKPOINTS_DIR:-./checkpoints}"
+RUN_DIR="${RUN_DIR:-.}"
+
+TGT=-1
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+cd ${RUN_DIR}
+python3 inference_demo.py \
+        --model_type "deepspeech2" \
+        --engine_file "${CHECKPOINTS_DIR}/deepspeech2.engine" \
+        --decoder_file "data/decoder.pdparams" \
+        --lang_model_path "${CHECKPOINTS_DIR}/common_crawl_00.prune01111.trie.klm" \
+        --run_loop 12 \
+        --warm_up 5 \
+        --throughput_target ${TGT}
\ No newline at end of file
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/transform/__init__.py b/models/speech/speech_recognition/deepspeech2/ixrt/transform/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbf4f9ce30dd180fa77d5821823c27e1a611d34e
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/transform/__init__.py
@@ -0,0 +1,2 @@
+from .transformation import Transformation
+
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/transform/cmvn.py b/models/speech/speech_recognition/deepspeech2/ixrt/transform/cmvn.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab75807b00e019148aa213f12ba28e6634a80dfe
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/transform/cmvn.py
@@ -0,0 +1,187 @@
+import io
+import json
+
+import h5py
+import kaldiio
+import numpy as np
+
+
+class CMVN():
+    "Apply Global/Spk CMVN/iverserCMVN."
+
+    def __init__(
+            self,
+            stats,
+            norm_means=True,
+            norm_vars=False,
+            filetype="mat",
+            utt2spk=None,
+            spk2utt=None,
+            reverse=False,
+            std_floor=1.0e-20, ):
+        self.stats_file = stats
+        self.norm_means = norm_means
+        self.norm_vars = norm_vars
+        self.reverse = reverse
+
+        if isinstance(stats, dict):
+            stats_dict = dict(stats)
+        else:
+            # Use for global CMVN
+            if filetype == "mat":
+                stats_dict = {None: kaldiio.load_mat(stats)}
+            # Use for global CMVN
+            elif filetype == "npy":
+                stats_dict = {None: np.load(stats)}
+            # Use for speaker CMVN
+            elif filetype == "ark":
+                self.accept_uttid = True
+                stats_dict = dict(kaldiio.load_ark(stats))
+            # Use for speaker CMVN
+            elif filetype == "hdf5":
+                self.accept_uttid = True
+                stats_dict = h5py.File(stats)
+            else:
+                raise ValueError("Not supporting filetype={}".format(filetype))
+
+        if utt2spk is not None:
+            self.utt2spk = {}
+            with io.open(utt2spk, "r", encoding="utf-8") as f:
+                for line in f:
+                    utt, spk = line.rstrip().split(None, 1)
+                    self.utt2spk[utt] = spk
+        elif spk2utt is not None:
+            self.utt2spk = {}
+            with io.open(spk2utt, "r", encoding="utf-8") as f:
+                for line in f:
+                    spk, utts = line.rstrip().split(None, 1)
+                    for utt in utts.split():
+                        self.utt2spk[utt] = spk
+        else:
+            self.utt2spk = None
+
+        # Kaldi makes a matrix for CMVN which has a shape of (2, feat_dim + 1),
+        # and the first vector contains the sum of feats and the second is
+        # the sum of squares. The last value of the first, i.e. stats[0,-1],
+        # is the number of samples for this statistics.
+        self.bias = {}
+        self.scale = {}
+        for spk, stats in stats_dict.items():
+            assert len(stats) == 2, stats.shape
+
+            count = stats[0, -1]
+
+            # If the feature has two or more dimensions
+            if not (np.isscalar(count) or isinstance(count, (int, float))):
+                # The first is only used
+                count = count.flatten()[0]
+
+            mean = stats[0, :-1] / count
+            # V(x) = E(x^2) - (E(x))^2
+            var = stats[1, :-1] / count - mean * mean
+            std = np.maximum(np.sqrt(var), std_floor)
+            self.bias[spk] = -mean
+            self.scale[spk] = 1 / std
+
+    def __repr__(self):
+        return ("{name}(stats_file={stats_file}, "
+                "norm_means={norm_means}, norm_vars={norm_vars}, "
+                "reverse={reverse})".format(
+                    name=self.__class__.__name__,
+                    stats_file=self.stats_file,
+                    norm_means=self.norm_means,
+                    norm_vars=self.norm_vars,
+                    reverse=self.reverse, ))
+
+    def __call__(self, x, uttid=None):
+        if self.utt2spk is not None:
+            spk = self.utt2spk[uttid]
+        else:
+            spk = uttid
+
+        if not self.reverse:
+            # apply cmvn
+            if self.norm_means:
+                x = np.add(x, self.bias[spk])
+            if self.norm_vars:
+                x = np.multiply(x, self.scale[spk])
+
+        else:
+            # apply reverse cmvn
+            if self.norm_vars:
+                x = np.divide(x, self.scale[spk])
+            if self.norm_means:
+                x = np.subtract(x, self.bias[spk])
+
+        return x
+
+
+class UtteranceCMVN():
+    "Apply Utterance CMVN"
+
+    def __init__(self, norm_means=True, norm_vars=False, std_floor=1.0e-20):
+        self.norm_means = norm_means
+        self.norm_vars = norm_vars
+        self.std_floor = std_floor
+
+    def __repr__(self):
+        return "{name}(norm_means={norm_means}, norm_vars={norm_vars})".format(
+            name=self.__class__.__name__,
+            norm_means=self.norm_means,
+            norm_vars=self.norm_vars, )
+
+    def __call__(self, x, uttid=None):
+        # x: [Time, Dim]
+        square_sums = (x**2).sum(axis=0)
+        mean = x.mean(axis=0)
+
+        if self.norm_means:
+            x = np.subtract(x, mean)
+
+        if self.norm_vars:
+            var = square_sums / x.shape[0] - mean**2
+            std = np.maximum(np.sqrt(var), self.std_floor)
+            x = np.divide(x, std)
+
+        return x
+
+
+class GlobalCMVN():
+    "Apply Global CMVN"
+
+    def __init__(self,
+                 cmvn_path,
+                 norm_means=True,
+                 norm_vars=True,
+                 std_floor=1.0e-20):
+        # cmvn_path: Option[str, dict]
+        cmvn = cmvn_path
+        self.cmvn = cmvn
+        self.norm_means = norm_means
+        self.norm_vars = norm_vars
+        self.std_floor = std_floor
+        if isinstance(cmvn, dict):
+            cmvn_stats = cmvn
+        else:
+            with open(cmvn) as f:
+                cmvn_stats = json.load(f)
+        self.count = cmvn_stats['frame_num']
+        self.mean = np.array(cmvn_stats['mean_stat']) / self.count
+        self.square_sums = np.array(cmvn_stats['var_stat'])
+        self.var = self.square_sums / self.count - self.mean**2
+        self.std = np.maximum(np.sqrt(self.var), self.std_floor)
+
+    def __repr__(self):
+        return f"""{self.__class__.__name__}(
+            cmvn_path={self.cmvn},
+            norm_means={self.norm_means},
+            norm_vars={self.norm_vars},)"""
+
+    def __call__(self, x, uttid=None):
+        # x: [Time, Dim]
+        if self.norm_means:
+            x = np.subtract(x, self.mean)
+
+        if self.norm_vars:
+            x = np.divide(x, self.std)
+        return x
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/transform/functional.py b/models/speech/speech_recognition/deepspeech2/ixrt/transform/functional.py
new file mode 100644
index 0000000000000000000000000000000000000000..688a0bede78787479a4d05b75ee80bf34bfb6e7d
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/transform/functional.py
@@ -0,0 +1,110 @@
+import inspect
+
+
+def check_kwargs(func, kwargs, name=None):
+    """check kwargs are valid for func
+
+    If kwargs are invalid, raise TypeError as same as python default
+    :param function func: function to be validated
+    :param dict kwargs: keyword arguments for func
+    :param str name: name used in TypeError (default is func name)
+    """
+    try:
+        params = inspect.signature(func).parameters
+    except ValueError:
+        return
+    if name is None:
+        name = func.__name__
+    for k in kwargs.keys():
+        if k not in params:
+            raise TypeError(
+                f"{name}() got an unexpected keyword argument '{k}'")
+
+
+class TransformInterface:
+    """Transform Interface"""
+
+    def __call__(self, x):
+        raise NotImplementedError("__call__ method is not implemented")
+
+    @classmethod
+    def add_arguments(cls, parser):
+        return parser
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class Identity(TransformInterface):
+    """Identity Function"""
+
+    def __call__(self, x):
+        return x
+
+
+class FuncTrans(TransformInterface):
+    """Functional Transformation
+
+    WARNING:
+        Builtin or C/C++ functions may not work properly
+        because this class heavily depends on the `inspect` module.
+
+    Usage:
+
+    >>> def foo_bar(x, a=1, b=2):
+    ...     '''Foo bar
+    ...     :param x: input
+    ...     :param int a: default 1
+    ...     :param int b: default 2
+    ...     '''
+    ...     return x + a - b
+
+
+    >>> class FooBar(FuncTrans):
+    ...     _func = foo_bar
+    ...     __doc__ = foo_bar.__doc__
+    """
+
+    _func = None
+
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        check_kwargs(self.func, kwargs)
+
+    def __call__(self, x):
+        return self.func(x, **self.kwargs)
+
+    @classmethod
+    def add_arguments(cls, parser):
+        fname = cls._func.__name__.replace("_", "-")
+        group = parser.add_argument_group(fname + " transformation setting")
+        for k, v in cls.default_params().items():
+            # TODO(karita): get help and choices from docstring?
+            attr = k.replace("_", "-")
+            group.add_argument(f"--{fname}-{attr}", default=v, type=type(v))
+        return parser
+
+    @property
+    def func(self):
+        return type(self)._func
+
+    @classmethod
+    def default_params(cls):
+        try:
+            d = dict(inspect.signature(cls._func).parameters)
+        except ValueError:
+            d = dict()
+        return {
+            k: v.default
+            for k, v in d.items() if v.default != inspect.Parameter.empty
+        }
+
+    def __repr__(self):
+        params = self.default_params()
+        params.update(**self.kwargs)
+        ret = self.__class__.__name__ + "("
+        if len(params) == 0:
+            return ret + ")"
+        for k, v in params.items():
+            ret += "{}={}, ".format(k, v)
+        return ret[:-2] + ")"
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/transform/spec_augment.py b/models/speech/speech_recognition/deepspeech2/ixrt/transform/spec_augment.py
new file mode 100644
index 0000000000000000000000000000000000000000..e83efa12ec5fe38034c1f964042ae5f457af1077
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/transform/spec_augment.py
@@ -0,0 +1,193 @@
+"""Spec Augment module for preprocessing i.e., data augmentation"""
+import random
+
+import numpy
+from PIL import Image
+
+from .functional import FuncTrans
+
+
+def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
+    """time warp for spec augment
+
+    move random center frame by the random width ~ uniform(-window, window)
+    :param numpy.ndarray x: spectrogram (time, freq)
+    :param int max_time_warp: maximum time frames to warp
+    :param bool inplace: overwrite x with the result
+    :param str mode: "PIL" (default, fast, not differentiable) or "sparse_image_warp"
+        (slow, differentiable)
+    :returns numpy.ndarray: time warped spectrogram (time, freq)
+    """
+    window = max_time_warp
+    if window == 0:
+        return x
+
+    if mode == "PIL":
+        t = x.shape[0]
+        if t - window <= window:
+            return x
+        # NOTE: randrange(a, b) emits a, a + 1, ..., b - 1
+        center = random.randrange(window, t - window)
+        warped = random.randrange(center - window, center +
+                                  window) + 1  # 1 ... t - 1
+
+        left = Image.fromarray(x[:center]).resize((x.shape[1], warped),
+                                                  Image.BICUBIC)
+        right = Image.fromarray(x[center:]).resize((x.shape[1], t - warped),
+                                                   Image.BICUBIC)
+        if inplace:
+            x[:warped] = left
+            x[warped:] = right
+            return x
+        return numpy.concatenate((left, right), 0)
+    else:
+        raise NotImplementedError("unknown resize mode: " + mode +
+                                  ", choose one from (PIL, sparse_image_warp).")
+
+
+class TimeWarp(FuncTrans):
+    _func = time_warp
+    __doc__ = time_warp.__doc__
+
+    def __call__(self, x, train):
+        if not train:
+            return x
+        return super().__call__(x)
+
+
+def freq_mask(x, F=30, n_mask=2, replace_with_zero=True, inplace=False):
+    """freq mask for spec agument
+
+    :param numpy.ndarray x: (time, freq)
+    :param int n_mask: the number of masks
+    :param bool inplace: overwrite
+    :param bool replace_with_zero: pad zero on mask if true else use mean
+    """
+    if inplace:
+        cloned = x
+    else:
+        cloned = x.copy()
+
+    num_mel_channels = cloned.shape[1]
+    fs = numpy.random.randint(0, F, size=(n_mask, 2))
+
+    for f, mask_end in fs:
+        f_zero = random.randrange(0, num_mel_channels - f)
+        mask_end += f_zero
+
+        # avoids randrange error if values are equal and range is empty
+        if f_zero == f_zero + f:
+            continue
+
+        if replace_with_zero:
+            cloned[:, f_zero:mask_end] = 0
+        else:
+            cloned[:, f_zero:mask_end] = cloned.mean()
+    return cloned
+
+
+class FreqMask(FuncTrans):
+    _func = freq_mask
+    __doc__ = freq_mask.__doc__
+
+    def __call__(self, x, train):
+        if not train:
+            return x
+        return super().__call__(x)
+
+
+def time_mask(spec, T=40, n_mask=2, replace_with_zero=True, inplace=False):
+    """freq mask for spec agument
+
+    :param numpy.ndarray spec: (time, freq)
+    :param int n_mask: the number of masks
+    :param bool inplace: overwrite
+    :param bool replace_with_zero: pad zero on mask if true else use mean
+    """
+    if inplace:
+        cloned = spec
+    else:
+        cloned = spec.copy()
+    len_spectro = cloned.shape[0]
+    ts = numpy.random.randint(0, T, size=(n_mask, 2))
+    for t, mask_end in ts:
+        # avoid randint range error
+        if len_spectro - t <= 0:
+            continue
+        t_zero = random.randrange(0, len_spectro - t)
+
+        # avoids randrange error if values are equal and range is empty
+        if t_zero == t_zero + t:
+            continue
+
+        mask_end += t_zero
+        if replace_with_zero:
+            cloned[t_zero:mask_end] = 0
+        else:
+            cloned[t_zero:mask_end] = cloned.mean()
+    return cloned
+
+
+class TimeMask(FuncTrans):
+    _func = time_mask
+    __doc__ = time_mask.__doc__
+
+    def __call__(self, x, train):
+        if not train:
+            return x
+        return super().__call__(x)
+
+
+def spec_augment(
+        x,
+        resize_mode="PIL",
+        max_time_warp=80,
+        max_freq_width=27,
+        n_freq_mask=2,
+        max_time_width=100,
+        n_time_mask=2,
+        inplace=True,
+        replace_with_zero=True, ):
+    """spec agument
+
+    apply random time warping and time/freq masking
+    default setting is based on LD (Librispeech double) in Table 2
+        https://arxiv.org/pdf/1904.08779.pdf
+
+    :param numpy.ndarray x: (time, freq)
+    :param str resize_mode: "PIL" (fast, nondifferentiable) or "sparse_image_warp"
+        (slow, differentiable)
+    :param int max_time_warp: maximum frames to warp the center frame in spectrogram (W)
+    :param int freq_mask_width: maximum width of the random freq mask (F)
+    :param int n_freq_mask: the number of the random freq mask (m_F)
+    :param int time_mask_width: maximum width of the random time mask (T)
+    :param int n_time_mask: the number of the random time mask (m_T)
+    :param bool inplace: overwrite intermediate array
+    :param bool replace_with_zero: pad zero on mask if true else use mean
+    """
+    assert isinstance(x, numpy.ndarray)
+    assert x.ndim == 2
+    x = time_warp(x, max_time_warp, inplace=inplace, mode=resize_mode)
+    x = freq_mask(
+        x,
+        max_freq_width,
+        n_freq_mask,
+        inplace=inplace,
+        replace_with_zero=replace_with_zero, )
+    x = time_mask(
+        x,
+        max_time_width,
+        n_time_mask,
+        inplace=inplace,
+        replace_with_zero=replace_with_zero, )
+    return x
+
+
+class SpecAugment(FuncTrans):
+    _func = spec_augment
+    __doc__ = spec_augment.__doc__
+
+    def __call__(self, x, train):
+        if not train:
+            return x
+        return super().__call__(x)
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/transform/spectrogram.py b/models/speech/speech_recognition/deepspeech2/ixrt/transform/spectrogram.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c2f0636bbdd5ac279decdb1ac2268f9a1b4e726
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/transform/spectrogram.py
@@ -0,0 +1,505 @@
+import librosa
+import numpy as np
+# import torch
+# from torchaudio.compliance import kaldi
+import paddle
+from paddleaudio.compliance import kaldi
+from python_speech_features import logfbank
+
+
+def stft(x,
+         n_fft,
+         n_shift,
+         win_length=None,
+         window="hann",
+         center=True,
+         pad_mode="reflect"):
+    # x: [Time, Channel]
+    if x.ndim == 1:
+        single_channel = True
+        # x: [Time] -> [Time, Channel]
+        x = x[:, None]
+    else:
+        single_channel = False
+    x = x.astype(np.float32)
+
+    # FIXME(kamo): librosa.stft can't use multi-channel?
+    # x: [Time, Channel, Freq]
+    x = np.stack(
+        [
+            librosa.stft(
+                y=x[:, ch],
+                n_fft=n_fft,
+                hop_length=n_shift,
+                win_length=win_length,
+                window=window,
+                center=center,
+                pad_mode=pad_mode, ).T for ch in range(x.shape[1])
+        ],
+        axis=1, )
+
+    if single_channel:
+        # x: [Time, Channel, Freq] -> [Time, Freq]
+        x = x[:, 0]
+    return x
+
+
+def istft(x, n_shift, win_length=None, window="hann", center=True):
+    # x: [Time, Channel, Freq]
+    if x.ndim == 2:
+        single_channel = True
+        # x: [Time, Freq] -> [Time, Channel, Freq]
+        x = x[:, None, :]
+    else:
+        single_channel = False
+
+    # x: [Time, Channel]
+    x = np.stack(
+        [
+            librosa.istft(
+                stft_matrix=x[:, ch].T,  # [Time, Freq] -> [Freq, Time]
+                hop_length=n_shift,
+                win_length=win_length,
+                window=window,
+                center=center, ) for ch in range(x.shape[1])
+        ],
+        axis=1, )
+
+    if single_channel:
+        # x: [Time, Channel] -> [Time]
+        x = x[:, 0]
+    return x
+
+
+def stft2logmelspectrogram(x_stft,
+                           fs,
+                           n_mels,
+                           n_fft,
+                           fmin=None,
+                           fmax=None,
+                           eps=1e-10):
+    # x_stft: (Time, Channel, Freq) or (Time, Freq)
+    fmin = 0 if fmin is None else fmin
+    fmax = fs / 2 if fmax is None else fmax
+
+    # spc: (Time, Channel, Freq) or (Time, Freq)
+    spc = np.abs(x_stft)
+    # mel_basis: (Mel_freq, Freq)
+    mel_basis = librosa.filters.mel(
+        sr=fs, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax)
+    # lmspc: (Time, Channel, Mel_freq) or (Time, Mel_freq)
+    lmspc = np.log10(np.maximum(eps, np.dot(spc, mel_basis.T)))
+
+    return lmspc
+
+
+def spectrogram(x, n_fft, n_shift, win_length=None, window="hann"):
+    # x: (Time, Channel) -> spc: (Time, Channel, Freq)
+    spc = np.abs(stft(x, n_fft, n_shift, win_length, window=window))
+    return spc
+
+
+def logmelspectrogram(
+        x,
+        fs,
+        n_mels,
+        n_fft,
+        n_shift,
+        win_length=None,
+        window="hann",
+        fmin=None,
+        fmax=None,
+        eps=1e-10,
+        pad_mode="reflect", ):
+    # stft: (Time, Channel, Freq) or (Time, Freq)
+    x_stft = stft(
+        x,
+        n_fft=n_fft,
+        n_shift=n_shift,
+        win_length=win_length,
+        window=window,
+        pad_mode=pad_mode, )
+
+    return stft2logmelspectrogram(
+        x_stft,
+        fs=fs,
+        n_mels=n_mels,
+        n_fft=n_fft,
+        fmin=fmin,
+        fmax=fmax,
+        eps=eps)
+
+
+class Spectrogram():
+    def __init__(self, n_fft, n_shift, win_length=None, window="hann"):
+        self.n_fft = n_fft
+        self.n_shift = n_shift
+        self.win_length = win_length
+        self.window = window
+
+    def __repr__(self):
+        return ("{name}(n_fft={n_fft}, n_shift={n_shift}, "
+                "win_length={win_length}, window={window})".format(
+                    name=self.__class__.__name__,
+                    n_fft=self.n_fft,
+                    n_shift=self.n_shift,
+                    win_length=self.win_length,
+                    window=self.window, ))
+
+    def __call__(self, x):
+        return spectrogram(
+            x,
+            n_fft=self.n_fft,
+            n_shift=self.n_shift,
+            win_length=self.win_length,
+            window=self.window, )
+
+
+class LogMelSpectrogram():
+    def __init__(
+            self,
+            fs,
+            n_mels,
+            n_fft,
+            n_shift,
+            win_length=None,
+            window="hann",
+            fmin=None,
+            fmax=None,
+            eps=1e-10, ):
+        self.fs = fs
+        self.n_mels = n_mels
+        self.n_fft = n_fft
+        self.n_shift = n_shift
+        self.win_length = win_length
+        self.window = window
+        self.fmin = fmin
+        self.fmax = fmax
+        self.eps = eps
+
+    def __repr__(self):
+        return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
+                "n_shift={n_shift}, win_length={win_length}, window={window}, "
+                "fmin={fmin}, fmax={fmax}, eps={eps}))".format(
+                    name=self.__class__.__name__,
+                    fs=self.fs,
+                    n_mels=self.n_mels,
+                    n_fft=self.n_fft,
+                    n_shift=self.n_shift,
+                    win_length=self.win_length,
+                    window=self.window,
+                    fmin=self.fmin,
+                    fmax=self.fmax,
+                    eps=self.eps, ))
+
+    def __call__(self, x):
+        return logmelspectrogram(
+            x,
+            fs=self.fs,
+            n_mels=self.n_mels,
+            n_fft=self.n_fft,
+            n_shift=self.n_shift,
+            win_length=self.win_length,
+            window=self.window, )
+
+
+class Stft2LogMelSpectrogram():
+    def __init__(self, fs, n_mels, n_fft, fmin=None, fmax=None, eps=1e-10):
+        self.fs = fs
+        self.n_mels = n_mels
+        self.n_fft = n_fft
+        self.fmin = fmin
+        self.fmax = fmax
+        self.eps = eps
+
+    def __repr__(self):
+        return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
+                "fmin={fmin}, fmax={fmax}, eps={eps}))".format(
+                    name=self.__class__.__name__,
+                    fs=self.fs,
+                    n_mels=self.n_mels,
+                    n_fft=self.n_fft,
+                    fmin=self.fmin,
+                    fmax=self.fmax,
+                    eps=self.eps, ))
+
+    def __call__(self, x):
+        return stft2logmelspectrogram(
+            x,
+            fs=self.fs,
+            n_mels=self.n_mels,
+            n_fft=self.n_fft,
+            fmin=self.fmin,
+            fmax=self.fmax, )
+
+
+class Stft():
+    def __init__(
+            self,
+            n_fft,
+            n_shift,
+            win_length=None,
+            window="hann",
+            center=True,
+            pad_mode="reflect", ):
+        self.n_fft = n_fft
+        self.n_shift = n_shift
+        self.win_length = win_length
+        self.window = window
+        self.center = center
+        self.pad_mode = pad_mode
+
+    def __repr__(self):
+        return ("{name}(n_fft={n_fft}, n_shift={n_shift}, "
+                "win_length={win_length}, window={window},"
+                "center={center}, pad_mode={pad_mode})".format(
+                    name=self.__class__.__name__,
+                    n_fft=self.n_fft,
+                    n_shift=self.n_shift,
+                    win_length=self.win_length,
+                    window=self.window,
+                    center=self.center,
+                    pad_mode=self.pad_mode, ))
+
+    def __call__(self, x):
+        return stft(
+            x,
+            self.n_fft,
+            self.n_shift,
+            win_length=self.win_length,
+            window=self.window,
+            center=self.center,
+            pad_mode=self.pad_mode, )
+
+
+class IStft():
+    def __init__(self, n_shift, win_length=None, window="hann", center=True):
+        self.n_shift = n_shift
+        self.win_length = win_length
+        self.window = window
+        self.center = center
+
+    def __repr__(self):
+        return ("{name}(n_shift={n_shift}, "
+                "win_length={win_length}, window={window},"
+                "center={center})".format(
+                    name=self.__class__.__name__,
+                    n_shift=self.n_shift,
+                    win_length=self.win_length,
+                    window=self.window,
+                    center=self.center, ))
+
+    def __call__(self, x):
+        return istft(
+            x,
+            self.n_shift,
+            win_length=self.win_length,
+            window=self.window,
+            center=self.center, )
+
+
+class LogMelSpectrogramKaldi():
+    def __init__(
+            self,
+            fs=16000,
+            n_mels=80,
+            n_shift=160,  # unit:sample, 10ms
+            win_length=400,  # unit:sample, 25ms
+            energy_floor=0.0,
+            dither=0.1):
+        """
+        The Kaldi implementation of LogMelSpectrogram
+        Args:
+            fs (int): sample rate of the audio
+            n_mels (int): number of mel filter banks
+            n_shift (int): number of points in a frame shift
+            win_length (int): number of points in a frame windows
+            energy_floor (float): Floor on energy in Spectrogram computation (absolute)
+            dither (float): Dithering constant
+
+        Returns:
+            LogMelSpectrogramKaldi
+        """
+
+        self.fs = fs
+        self.n_mels = n_mels
+        num_point_ms = fs / 1000
+        self.n_frame_length = win_length / num_point_ms
+        self.n_frame_shift = n_shift / num_point_ms
+        self.energy_floor = energy_floor
+        self.dither = dither
+
+    def __repr__(self):
+        return (
+            "{name}(fs={fs}, n_mels={n_mels}, "
+            "n_frame_shift={n_frame_shift}, n_frame_length={n_frame_length}, "
+            "dither={dither}))".format(
+                name=self.__class__.__name__,
+                fs=self.fs,
+                n_mels=self.n_mels,
+                n_frame_shift=self.n_frame_shift,
+                n_frame_length=self.n_frame_length,
+                dither=self.dither, ))
+
+    def __call__(self, x, train):
+        """
+        Args:
+            x (np.ndarray): shape (Ti,)
+            train (bool): True, train mode.
+
+        Raises:
+            ValueError: not support (Ti, C)
+
+        Returns:
+            np.ndarray: (T, D)
+        """
+        dither = self.dither if train else 0.0
+        if x.ndim != 1:
+            raise ValueError("Not support x: [Time, Channel]")
+
+
+        # torchaudio
+        """
+        waveform = torch.from_numpy(np.expand_dims(x, 0)).type(torch.float32)
+        mat = kaldi.fbank(
+            waveform,
+            num_mel_bins=self.n_mels,
+            frame_length=self.n_frame_length,
+            frame_shift=self.n_frame_shift,
+            dither=dither,
+            energy_floor=self.energy_floor,
+            sample_frequency=self.fs)
+        """
+        # paddlespeech
+        waveform = paddle.to_tensor(np.expand_dims(x, 0), dtype=paddle.float32)
+        mat = kaldi.fbank(
+            waveform,
+            n_mels=self.n_mels,
+            frame_length=self.n_frame_length,
+            frame_shift=self.n_frame_shift,
+            dither=dither,
+            energy_floor=self.energy_floor,
+            sr=self.fs)
+        mat = np.squeeze(mat.numpy())
+        return mat
+
+
+class WavProcess():
+    def __init__(self):
+        """
+        Args:
+            dither (float): Dithering constant
+
+        Returns:
+        """
+
+    def __call__(self, x):
+        """
+        Args:
+            x (np.ndarray): shape (Ti,)
+            train (bool): True, train mode.
+
+        Raises:
+            ValueError: not support (Ti, C)
+
+        Returns:
+            np.ndarray: (T, D)
+        """
+        if x.ndim != 1:
+            raise ValueError("Not support x: [Time, Channel]")
+        waveform = x.astype("float32") / 32768.0
+        waveform = np.expand_dims(waveform, -1)
+        return waveform
+
+
+class LogMelSpectrogramKaldi_decay():
+    def __init__(
+            self,
+            fs=16000,
+            n_mels=80,
+            n_fft=512,  # fft point
+            n_shift=160,  # unit:sample, 10ms
+            win_length=400,  # unit:sample, 25ms
+            window="povey",
+            fmin=20,
+            fmax=None,
+            eps=1e-10,
+            dither=1.0):
+        self.fs = fs
+        self.n_mels = n_mels
+        self.n_fft = n_fft
+        if n_shift > win_length:
+            raise ValueError("Stride size must not be greater than "
+                             "window size.")
+        self.n_shift = n_shift / fs  # unit: ms
+        self.win_length = win_length / fs  # unit: ms
+
+        self.window = window
+        self.fmin = fmin
+        if fmax is None:
+            fmax_ = fmax if fmax else self.fs / 2
+        elif fmax > int(self.fs / 2):
+            raise ValueError("fmax must not be greater than half of "
+                             "sample rate.")
+        self.fmax = fmax_
+
+        self.eps = eps
+        self.remove_dc_offset = True
+        self.preemph = 0.97
+        self.dither = dither  # only work in train mode
+
+    def __repr__(self):
+        return (
+            "{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
+            "n_shift={n_shift}, win_length={win_length}, preemph={preemph}, window={window}, "
+            "fmin={fmin}, fmax={fmax}, eps={eps}, dither={dither}))".format(
+                name=self.__class__.__name__,
+                fs=self.fs,
+                n_mels=self.n_mels,
+                n_fft=self.n_fft,
+                n_shift=self.n_shift,
+                preemph=self.preemph,
+                win_length=self.win_length,
+                window=self.window,
+                fmin=self.fmin,
+                fmax=self.fmax,
+                eps=self.eps,
+                dither=self.dither, ))
+
+    def __call__(self, x, train):
+        """
+
+        Args:
+            x (np.ndarray): shape (Ti,)
+            train (bool): True, train mode.
+
+        Raises:
+            ValueError: not support (Ti, C)
+
+        Returns:
+            np.ndarray: (T, D)
+        """
+        dither = self.dither if train else 0.0
+        if x.ndim != 1:
+            raise ValueError("Not support x: [Time, Channel]")
+
+        if x.dtype in np.sctypes['float']:
+            # PCM32 -> PCM16
+            bits = np.iinfo(np.int16).bits
+            x = x * 2**(bits - 1)
+
+        # logfbank need PCM16 input
+        y = logfbank(
+            signal=x,
+            samplerate=self.fs,
+            winlen=self.win_length,  # unit ms
+            winstep=self.n_shift,  # unit ms
+            nfilt=self.n_mels,
+            nfft=self.n_fft,
+            lowfreq=self.fmin,
+            highfreq=self.fmax,
+            dither=dither,
+            remove_dc_offset=self.remove_dc_offset,
+            preemph=self.preemph,
+            wintype=self.window)
+        return y
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/transform/transformation.py b/models/speech/speech_recognition/deepspeech2/ixrt/transform/transformation.py
new file mode 100644
index 0000000000000000000000000000000000000000..39e97816e228283ad411c160913333047931d152
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/transform/transformation.py
@@ -0,0 +1,156 @@
+"""Transformation module."""
+import copy
+import yaml
+import io
+import logging
+import importlib
+from collections import OrderedDict
+from collections.abc import Sequence
+from inspect import signature
+
+
+import_alias = dict(
+    identity="transform.functional:Identity",
+    time_warp="transform.spec_augment:TimeWarp",
+    time_mask="transform.spec_augment:TimeMask",
+    freq_mask="transform.spec_augment:FreqMask",
+    cmvn="transform.cmvn:CMVN",
+    fbank="transform.spectrogram:LogMelSpectrogram",
+    spectrogram="transform.spectrogram:Spectrogram",
+    wav_process="transform.spectrogram:WavProcess",
+    stft="transform.spectrogram:Stft",
+    istft="transform.spectrogram:IStft",
+    stft2fbank="transform.spectrogram:Stft2LogMelSpectrogram",
+    fbank_kaldi="transform.spectrogram:LogMelSpectrogramKaldi",
+    cmvn_json="transform.cmvn:GlobalCMVN")
+
+
+def dynamic_import(import_path, alias=dict()):
+    """dynamic import module and class
+
+    :param str import_path: syntax 'module_name:class_name'
+        e.g., 'paddlespeech.s2t.models.u2:U2Model'
+    :param dict alias: shortcut for registered class
+    :return: imported class
+    """
+    if import_path not in alias and ":" not in import_path:
+        raise ValueError(
+            "import_path should be one of {} or "
+            'include ":", e.g. "paddlespeech.s2t.models.u2:U2Model" : '
+            "{}".format(set(alias), import_path))
+    if ":" not in import_path:
+        import_path = alias[import_path]
+
+    module_name, objname = import_path.split(":")
+    m = importlib.import_module(module_name)
+    return getattr(m, objname)
+
+
+class Transformation():
+    """Apply some functions to the mini-batch
+
+    Examples:
+        >>> kwargs = {"process": [{"type": "fbank",
+        ...                        "n_mels": 80,
+        ...                        "fs": 16000},
+        ...                       {"type": "cmvn",
+        ...                        "stats": "data/train/cmvn.ark",
+        ...                        "norm_vars": True},
+        ...                       {"type": "delta", "window": 2, "order": 2}]}
+        >>> transform = Transformation(kwargs)
+        >>> bs = 10
+        >>> xs = [np.random.randn(100, 80).astype(np.float32)
+        ...       for _ in range(bs)]
+        >>> xs = transform(xs)
+    """
+
+    def __init__(self, conffile=None):
+        if conffile is not None:
+            if isinstance(conffile, dict):
+                self.conf = copy.deepcopy(conffile)
+            else:
+                with io.open(conffile, encoding="utf-8") as f:
+                    self.conf = yaml.safe_load(f)
+                    assert isinstance(self.conf, dict), type(self.conf)
+        else:
+            self.conf = {"mode": "sequential", "process": []}
+
+        self.functions = OrderedDict()
+        if self.conf.get("mode", "sequential") == "sequential":
+            for idx, process in enumerate(self.conf["process"]):
+                assert isinstance(process, dict), type(process)
+                opts = dict(process)
+                process_type = opts.pop("type")
+                class_obj = dynamic_import(process_type, import_alias)
+                # TODO(karita): assert issubclass(class_obj, TransformInterface)
+                try:
+                    self.functions[idx] = class_obj(**opts)
+                except TypeError:
+                    try:
+                        signa = signature(class_obj)
+                    except ValueError:
+                        # Some function, e.g. built-in function, are failed
+                        pass
+                    else:
+                        logging.error("Expected signature: {}({})".format(
+                            class_obj.__name__, signa))
+                    raise
+        else:
+            raise NotImplementedError(
+                "Not supporting mode={}".format(self.conf["mode"]))
+
+    def __repr__(self):
+        rep = "\n" + "\n".join("    {}: {}".format(k, v)
+                               for k, v in self.functions.items())
+        return "{}({})".format(self.__class__.__name__, rep)
+
+    def __call__(self, xs, uttid_list=None, **kwargs):
+        """Return new mini-batch
+
+        :param Union[Sequence[np.ndarray], np.ndarray] xs:
+        :param Union[Sequence[str], str] uttid_list:
+        :return: batch:
+        :rtype: List[np.ndarray]
+        """
+        if not isinstance(xs, Sequence):
+            is_batch = False
+            xs = [xs]
+        else:
+            is_batch = True
+
+        if isinstance(uttid_list, str):
+            uttid_list = [uttid_list for _ in range(len(xs))]
+
+        if self.conf.get("mode", "sequential") == "sequential":
+            for idx in range(len(self.conf["process"])):
+                func = self.functions[idx]
+
+                # TODO(karita): use TrainingTrans and UttTrans to check __call__ args
+                # Derive only the args which the func has
+                try:
+                    param = signature(func).parameters
+                except ValueError:
+                    # Some function, e.g. built-in function, are failed
+                    param = {}
+                _kwargs = {k: v for k, v in kwargs.items() if k in param}
+                try:
+                    if uttid_list is not None and "uttid" in param:
+                        xs = [
+                            func(x, u, **_kwargs)
+                            for x, u in zip(xs, uttid_list)
+                        ]
+                    else:
+                        xs = [func(x, **_kwargs) for x in xs]
+
+                except Exception:
+                    logging.fatal("Catch a exception from {}th func: {}".format(
+                        idx, func))
+                    raise
+        else:
+            raise NotImplementedError(
+                "Not supporting mode={}".format(self.conf["mode"]))
+
+        if is_batch:
+            return xs
+        else:
+            return xs[0]
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/utils/__init__.py b/models/speech/speech_recognition/deepspeech2/ixrt/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..309f97ed17605a7ee8b945efb9c90bc7245f7286
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/utils/__init__.py
@@ -0,0 +1,16 @@
+from .load_tensorrt import deepspeech2_trtapi_ixrt, setup_io_bindings
+
+
+VOCABLIST = ['<blank>',
+             '<unk>',
+             "'",
+             '<space>',
+             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+             '<eos>']
+
+
+def check_target(inference, target):
+    satisfied = False
+    if inference > target:
+        satisfied = True
+    return satisfied
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/utils/error_rate.py b/models/speech/speech_recognition/deepspeech2/ixrt/utils/error_rate.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e81f9111703bfba5a5ffb73905dd7d4376e2f39
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/utils/error_rate.py
@@ -0,0 +1,351 @@
+"""This module provides functions to calculate error rate in different level.
+e.g. wer for word-level, cer for char-level.
+"""
+from itertools import groupby
+
+import editdistance
+import numpy as np
+
+__all__ = ['word_errors', 'char_errors', 'wer', 'cer', "ErrorCalculator"]
+
+
+def _levenshtein_distance(ref, hyp):
+    """Levenshtein distance is a string metric for measuring the difference
+    between two sequences. Informally, the levenshtein disctance is defined as
+    the minimum number of single-character edits (substitutions, insertions or
+    deletions) required to change one word into the other. We can naturally
+    extend the edits to word level when calculate levenshtein disctance for
+    two sentences.
+    """
+    m = len(ref)
+    n = len(hyp)
+
+    # special case
+    if ref == hyp:
+        return 0
+    if m == 0:
+        return n
+    if n == 0:
+        return m
+
+    if m < n:
+        ref, hyp = hyp, ref
+        m, n = n, m
+
+    # use O(min(m, n)) space
+    distance = np.zeros((2, n + 1), dtype=np.int32)
+
+    # initialize distance matrix
+    for j in range(n + 1):
+        distance[0][j] = j
+
+    # calculate levenshtein distance
+    for i in range(1, m + 1):
+        prev_row_idx = (i - 1) % 2
+        cur_row_idx = i % 2
+        distance[cur_row_idx][0] = i
+        for j in range(1, n + 1):
+            if ref[i - 1] == hyp[j - 1]:
+                distance[cur_row_idx][j] = distance[prev_row_idx][j - 1]
+            else:
+                s_num = distance[prev_row_idx][j - 1] + 1
+                i_num = distance[cur_row_idx][j - 1] + 1
+                d_num = distance[prev_row_idx][j] + 1
+                distance[cur_row_idx][j] = min(s_num, i_num, d_num)
+
+    return distance[m % 2][n]
+
+
+def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '):
+    """Compute the levenshtein distance between reference sequence and
+    hypothesis sequence in word-level.
+
+    :param reference: The reference sentence.
+    :type reference: str
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: str
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
+    :param delimiter: Delimiter of input sentences.
+    :type delimiter: char
+    :return: Levenshtein distance and word number of reference sentence.
+    :rtype: list
+    """
+    if ignore_case:
+        reference = reference.lower()
+        hypothesis = hypothesis.lower()
+
+    ref_words = list(filter(None, reference.split(delimiter)))
+    hyp_words = list(filter(None, hypothesis.split(delimiter)))
+
+    edit_distance = _levenshtein_distance(ref_words, hyp_words)
+    # `editdistance.eavl precision` less than `_levenshtein_distance`
+    # edit_distance = editdistance.eval(ref_words, hyp_words)
+    return float(edit_distance), len(ref_words)
+
+
+def char_errors(reference, hypothesis, ignore_case=False, remove_space=False):
+    """Compute the levenshtein distance between reference sequence and
+    hypothesis sequence in char-level.
+
+    :param reference: The reference sentence.
+    :type reference: str
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: str
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
+    :param remove_space: Whether remove internal space characters
+    :type remove_space: bool
+    :return: Levenshtein distance and length of reference sentence.
+    :rtype: list
+    """
+    if ignore_case:
+        reference = reference.lower()
+        hypothesis = hypothesis.lower()
+
+    join_char = ' '
+    if remove_space:
+        join_char = ''
+
+    reference = join_char.join(list(filter(None, reference.split(' '))))
+    hypothesis = join_char.join(list(filter(None, hypothesis.split(' '))))
+
+    edit_distance = _levenshtein_distance(reference, hypothesis)
+    # `editdistance.eavl precision` less than `_levenshtein_distance`
+    # edit_distance = editdistance.eval(reference, hypothesis)
+    return float(edit_distance), len(reference)
+
+
+def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
+    """Calculate word error rate (WER). WER compares reference text and
+    hypothesis text in word-level. WER is defined as:
+
+    .. math::
+        WER = (Sw + Dw + Iw) / Nw
+
+    where
+
+    .. code-block:: text
+
+        Sw is the number of words subsituted,
+        Dw is the number of words deleted,
+        Iw is the number of words inserted,
+        Nw is the number of words in the reference
+
+    We can use levenshtein distance to calculate WER. Please draw an attention
+    that empty items will be removed when splitting sentences by delimiter.
+
+    :param reference: The reference sentence.
+    :type reference: str
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: str
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
+    :param delimiter: Delimiter of input sentences.
+    :type delimiter: char
+    :return: Word error rate.
+    :rtype: float
+    :raises ValueError: If word number of reference is zero.
+    """
+    edit_distance, ref_len = word_errors(reference, hypothesis, ignore_case,
+                                         delimiter)
+
+    if ref_len == 0:
+        raise ValueError("Reference's word number should be greater than 0.")
+
+    wer = float(edit_distance) / ref_len
+    return wer
+
+
+def cer(reference, hypothesis, ignore_case=False, remove_space=False):
+    """Calculate charactor error rate (CER). CER compares reference text and
+    hypothesis text in char-level. CER is defined as:
+
+    .. math::
+        CER = (Sc + Dc + Ic) / Nc
+
+    where
+
+    .. code-block:: text
+
+        Sc is the number of characters substituted,
+        Dc is the number of characters deleted,
+        Ic is the number of characters inserted
+        Nc is the number of characters in the reference
+
+    We can use levenshtein distance to calculate CER. Chinese input should be
+    encoded to unicode. Please draw an attention that the leading and tailing
+    space characters will be truncated and multiple consecutive space
+    characters in a sentence will be replaced by one space character.
+
+    :param reference: The reference sentence.
+    :type reference: str
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: str
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
+    :param remove_space: Whether remove internal space characters
+    :type remove_space: bool
+    :return: Character error rate.
+    :rtype: float
+    :raises ValueError: If the reference length is zero.
+    """
+    edit_distance, ref_len = char_errors(reference, hypothesis, ignore_case,
+                                         remove_space)
+
+    if ref_len == 0:
+        raise ValueError("Length of reference should be greater than 0.")
+
+    cer = float(edit_distance) / ref_len
+    return cer
+
+
+class ErrorCalculator():
+    """Calculate CER and WER for E2E_ASR and CTC models during training.
+
+    :param y_hats: numpy array with predicted text
+    :param y_pads: numpy array with true (target) text
+    :param char_list: List[str]
+    :param sym_space: <space>
+    :param sym_blank: <blank>
+    :return:
+    """
+
+    def __init__(self,
+                 char_list,
+                 sym_space,
+                 sym_blank,
+                 report_cer=False,
+                 report_wer=False):
+        """Construct an ErrorCalculator object."""
+        super().__init__()
+
+        self.report_cer = report_cer
+        self.report_wer = report_wer
+
+        self.char_list = char_list
+        self.space = sym_space
+        self.blank = sym_blank
+        self.idx_blank = self.char_list.index(self.blank)
+        if self.space in self.char_list:
+            self.idx_space = self.char_list.index(self.space)
+        else:
+            self.idx_space = None
+
+    def __call__(self, ys_hat, ys_pad, is_ctc=False):
+        """Calculate sentence-level WER/CER score.
+
+        :param paddle.Tensor ys_hat: prediction (batch, seqlen)
+        :param paddle.Tensor ys_pad: reference (batch, seqlen)
+        :param bool is_ctc: calculate CER score for CTC
+        :return: sentence-level WER score
+        :rtype float
+        :return: sentence-level CER score
+        :rtype float
+        """
+        cer, wer = None, None
+        if is_ctc:
+            return self.calculate_cer_ctc(ys_hat, ys_pad)
+        elif not self.report_cer and not self.report_wer:
+            return cer, wer
+
+        seqs_hat, seqs_true = self.convert_to_char(ys_hat, ys_pad)
+        if self.report_cer:
+            cer = self.calculate_cer(seqs_hat, seqs_true)
+
+        if self.report_wer:
+            wer = self.calculate_wer(seqs_hat, seqs_true)
+        return cer, wer
+
+    def calculate_cer_ctc(self, ys_hat, ys_pad):
+        """Calculate sentence-level CER score for CTC.
+
+        :param paddle.Tensor ys_hat: prediction (batch, seqlen)
+        :param paddle.Tensor ys_pad: reference (batch, seqlen)
+        :return: average sentence-level CER score
+        :rtype float
+        """
+        cers, char_ref_lens = [], []
+        for i, y in enumerate(ys_hat):
+            y_hat = [x[0] for x in groupby(y)]
+            y_true = ys_pad[i]
+            seq_hat, seq_true = [], []
+            for idx in y_hat:
+                idx = int(idx)
+                if idx != -1 and idx != self.idx_blank and idx != self.idx_space:
+                    seq_hat.append(self.char_list[int(idx)])
+
+            for idx in y_true:
+                idx = int(idx)
+                if idx != -1 and idx != self.idx_blank and idx != self.idx_space:
+                    seq_true.append(self.char_list[int(idx)])
+
+            hyp_chars = "".join(seq_hat)
+            ref_chars = "".join(seq_true)
+            if len(ref_chars) > 0:
+                cers.append(editdistance.eval(hyp_chars, ref_chars))
+                char_ref_lens.append(len(ref_chars))
+
+        cer_ctc = float(sum(cers)) / sum(char_ref_lens) if cers else None
+        return cer_ctc
+
+    def convert_to_char(self, ys_hat, ys_pad):
+        """Convert index to character.
+
+        :param paddle.Tensor seqs_hat: prediction (batch, seqlen)
+        :param paddle.Tensor seqs_true: reference (batch, seqlen)
+        :return: token list of prediction
+        :rtype list
+        :return: token list of reference
+        :rtype list
+        """
+        seqs_hat, seqs_true = [], []
+        for i, y_hat in enumerate(ys_hat):
+            y_true = ys_pad[i]
+            eos_true = np.where(y_true == -1)[0]
+            ymax = eos_true[0] if len(eos_true) > 0 else len(y_true)
+            # NOTE: padding index (-1) in y_true is used to pad y_hat
+            seq_hat = [self.char_list[int(idx)] for idx in y_hat[:ymax]]
+            seq_true = [
+                self.char_list[int(idx)] for idx in y_true if int(idx) != -1
+            ]
+            seq_hat_text = "".join(seq_hat).replace(self.space, " ")
+            seq_hat_text = seq_hat_text.replace(self.blank, "")
+            seq_true_text = "".join(seq_true).replace(self.space, " ")
+            seqs_hat.append(seq_hat_text)
+            seqs_true.append(seq_true_text)
+        return seqs_hat, seqs_true
+
+    def calculate_cer(self, seqs_hat, seqs_true):
+        """Calculate sentence-level CER score.
+
+        :param list seqs_hat: prediction
+        :param list seqs_true: reference
+        :return: average sentence-level CER score
+        :rtype float
+        """
+        char_eds, char_ref_lens = [], []
+        for i, seq_hat_text in enumerate(seqs_hat):
+            seq_true_text = seqs_true[i]
+            hyp_chars = seq_hat_text.replace(" ", "")
+            ref_chars = seq_true_text.replace(" ", "")
+            char_eds.append(editdistance.eval(hyp_chars, ref_chars))
+            char_ref_lens.append(len(ref_chars))
+        return float(sum(char_eds)) / sum(char_ref_lens)
+
+    def calculate_wer(self, seqs_hat, seqs_true):
+        """Calculate sentence-level WER score.
+
+        :param list seqs_hat: prediction
+        :param list seqs_true: reference
+        :return: average sentence-level WER score
+        :rtype float
+        """
+        word_eds, word_ref_lens = [], []
+        for i, seq_hat_text in enumerate(seqs_hat):
+            seq_true_text = seqs_true[i]
+            hyp_words = seq_hat_text.split()
+            ref_words = seq_true_text.split()
+            word_eds.append(editdistance.eval(hyp_words, ref_words))
+            word_ref_lens.append(len(ref_words))
+        return float(sum(word_eds)) / sum(word_ref_lens)
diff --git a/models/speech/speech_recognition/deepspeech2/ixrt/utils/load_tensorrt.py b/models/speech/speech_recognition/deepspeech2/ixrt/utils/load_tensorrt.py
new file mode 100644
index 0000000000000000000000000000000000000000..164a939437b0f28c2d1c544850aead9cc3bb9e59
--- /dev/null
+++ b/models/speech/speech_recognition/deepspeech2/ixrt/utils/load_tensorrt.py
@@ -0,0 +1,56 @@
+import numpy as np
+import tensorrt
+from tensorrt import Dims
+from cuda import cuda, cudart
+
+
+def deepspeech2_trtapi_ixrt(engine_file):
+    datatype = tensorrt.DataType.FLOAT
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+    with open(engine_file, "rb") as f, tensorrt.Runtime(logger) as runtime:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+
+def setup_io_bindings(engine, context):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = context.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
+
diff --git a/tests/model_info.json b/tests/model_info.json
index 1eb8591fa3a56ccd51211c20095b16493a1344d7..18488e263d4db5fd703b36fa6cc07fe9ac21df72 100644
--- a/tests/model_info.json
+++ b/tests/model_info.json
@@ -10424,6 +10424,207 @@
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
+        },
+        {
+            "display_name": "ViT",
+            "model_name": "vit",
+            "framework": "ixrt",
+            "release_version": "26.06",
+            "release_sdk": "4.4.0",
+            "release_gpgpu": "MR-V100",
+            "latest_sdk": "4.4.0",
+            "latest_gpgpu": "",
+            "category": "cv/classification",
+            "toolbox": "",
+            "mdims": "",
+            "dataset": "",
+            "license": "",
+            "model_path": "models/cv/classification/vit/ixrt/",
+            "readme_file": "models/cv/classification/vit/ixrt/README.md",
+            "bitbucket_repo": "",
+            "bitbucket_branch": "",
+            "bitbucket_path": "",
+            "develop_owner": "",
+            "github_repo": "",
+            "github_branch": "",
+            "github_path": "",
+            "datasets": "https://www.image-net.org/download.php",
+            "download_url": "https://local/vit.onnx",
+            "need_third_part": true,
+            "precisions": [
+                "fp16"
+            ],
+            "type": "inference",
+            "hasDemo": false,
+            "demoType": ""
+        },
+        {
+            "display_name": "DeiT-B",
+            "model_name": "deit_b",
+            "framework": "ixrt",
+            "release_version": "26.06",
+            "release_sdk": "4.4.0",
+            "release_gpgpu": "MR-V100",
+            "latest_sdk": "4.4.0",
+            "latest_gpgpu": "",
+            "category": "cv/classification",
+            "toolbox": "",
+            "mdims": "",
+            "dataset": "",
+            "license": "",
+            "model_path": "models/cv/classification/deit_b/ixrt/",
+            "readme_file": "models/cv/classification/deit_b/ixrt/README.md",
+            "bitbucket_repo": "",
+            "bitbucket_branch": "",
+            "bitbucket_path": "",
+            "develop_owner": "",
+            "github_repo": "",
+            "github_branch": "",
+            "github_path": "",
+            "datasets": "https://www.image-net.org/download.php",
+            "download_url": "https://local/deit_b.onnx",
+            "need_third_part": true,
+            "precisions": [
+                "fp16"
+            ],
+            "type": "inference",
+            "hasDemo": false,
+            "demoType": ""
+        },
+        {
+            "display_name": "MobileNetV1",
+            "model_name": "mobilenet_v1",
+            "framework": "ixrt",
+            "release_version": "26.06",
+            "release_sdk": "4.4.0",
+            "release_gpgpu": "MR-V100",
+            "latest_sdk": "4.4.0",
+            "latest_gpgpu": "",
+            "category": "cv/classification",
+            "toolbox": "",
+            "mdims": "",
+            "dataset": "",
+            "license": "",
+            "model_path": "models/cv/classification/mobilenet_v1/ixrt/",
+            "readme_file": "models/cv/classification/mobilenet_v1/ixrt/README.md",
+            "bitbucket_repo": "",
+            "bitbucket_branch": "",
+            "bitbucket_path": "",
+            "develop_owner": "",
+            "github_repo": "",
+            "github_branch": "",
+            "github_path": "",
+            "datasets": "https://www.image-net.org/download.php",
+            "download_url": "https://local/mobilenet_v1.onnx",
+            "need_third_part": true,
+            "precisions": [
+                "fp16",
+                "int8"
+            ],
+            "type": "inference",
+            "hasDemo": false,
+            "demoType": ""
+        },
+        {
+            "display_name": "DBNet",
+            "model_name": "dbnet",
+            "framework": "ixrt",
+            "release_version": "26.06",
+            "release_sdk": "4.4.0",
+            "release_gpgpu": "MR-V100",
+            "latest_sdk": "4.4.0",
+            "latest_gpgpu": "",
+            "category": "cv/ocr",
+            "toolbox": "",
+            "mdims": "",
+            "dataset": "",
+            "license": "",
+            "model_path": "models/cv/ocr/dbnet/ixrt/",
+            "readme_file": "models/cv/ocr/dbnet/ixrt/README.md",
+            "bitbucket_repo": "",
+            "bitbucket_branch": "",
+            "bitbucket_path": "",
+            "develop_owner": "",
+            "github_repo": "",
+            "github_branch": "",
+            "github_path": "",
+            "datasets": "local/icdar2015",
+            "download_url": "http://local/dbnet.onnx",
+            "need_third_part": true,
+            "precisions": [
+                "fp16",
+                "int8"
+            ],
+            "type": "inference",
+            "hasDemo": false,
+            "demoType": ""
+        },
+        {
+            "display_name": "DDRNet",
+            "model_name": "ddrnet",
+            "framework": "ixrt",
+            "release_version": "26.06",
+            "release_sdk": "4.4.0",
+            "release_gpgpu": "MR-V100",
+            "latest_sdk": "4.4.0",
+            "latest_gpgpu": "",
+            "category": "cv/semantic_segmentation",
+            "toolbox": "",
+            "mdims": "",
+            "dataset": "",
+            "license": "",
+            "model_path": "models/cv/semantic_segmentation/ddrnet/ixrt/",
+            "readme_file": "models/cv/semantic_segmentation/ddrnet/ixrt/README.md",
+            "bitbucket_repo": "",
+            "bitbucket_branch": "",
+            "bitbucket_path": "",
+            "develop_owner": "",
+            "github_repo": "",
+            "github_branch": "",
+            "github_path": "",
+            "datasets": "local/cityscapes",
+            "download_url": "http://local/ddrnet.onnx",
+            "need_third_part": true,
+            "precisions": [
+                "fp16",
+                "int8"
+            ],
+            "type": "inference",
+            "hasDemo": false,
+            "demoType": ""
+        },
+        {
+            "display_name": "DeepSpeech2",
+            "model_name": "deepspeech2",
+            "framework": "ixrt",
+            "release_version": "26.06",
+            "release_sdk": "4.4.0",
+            "release_gpgpu": "MR-V100",
+            "latest_sdk": "4.4.0",
+            "latest_gpgpu": "",
+            "category": "speech/speech_recognition",
+            "toolbox": "",
+            "mdims": "",
+            "dataset": "",
+            "license": "",
+            "model_path": "models/speech/speech_recognition/deepspeech2/ixrt/",
+            "readme_file": "models/speech/speech_recognition/deepspeech2/ixrt/README.md",
+            "bitbucket_repo": "",
+            "bitbucket_branch": "",
+            "bitbucket_path": "",
+            "develop_owner": "",
+            "github_repo": "",
+            "github_branch": "",
+            "github_path": "",
+            "datasets": "local/LibriSpeech",
+            "download_url": "http://local/deepspeech2.onnx",
+            "need_third_part": true,
+            "precisions": [
+                "fp16"
+            ],
+            "type": "inference",
+            "hasDemo": false,
+            "demoType": ""
         }
     ]
 }
\ No newline at end of file
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 61a86fad7fbd82ced93b8fd005992e4d9c4c80fb..420ee2a6684248acce889d298dfe5d433fc06509 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -124,11 +124,11 @@ def main():
         logging.info(f"End running {model['model_name']} test case.")
     
     # multi_object_tracking模型
-    if model["category"] in ["cv/multi_object_tracking", "cv/semantic_segmentation", "cv/ocr", "multimodal/diffusion_model", "speech/speech_synthesis"]:
+    if model["category"] in ["cv/multi_object_tracking", "cv/semantic_segmentation", "cv/ocr", "multimodal/diffusion_model", "speech/speech_synthesis", "speech/speech_recognition"]:
         logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
         d_url = model["download_url"]
         if d_url is not None:
-            result = run_multi_object_tracking_testcase(model)
+            result = run_multi_object_tracking_testcase(model, whl_url)
             check_model_result(result)
             logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
         logging.info(f"End running {model['model_name']} test case.")
@@ -507,7 +507,7 @@ def run_segmentation_and_face_testcase(model):
         logging.debug(f"matchs:\n{matchs}")
     return result
 
-def run_multi_object_tracking_testcase(model):
+def run_multi_object_tracking_testcase(model, whl_url):
     model_name = model["model_name"]
     result = {
         "name": model_name,
@@ -527,6 +527,11 @@ def run_multi_object_tracking_testcase(model):
     ls -l | grep onnx
     """
 
+    if model_name == "deepspeech2":
+        prepare_script += f"""
+        pip install {whl_url}`curl -s {whl_url} | grep -o 'paddlepaddle-[^"]*\.whl' | head -n1`
+        """
+
     # add pip list info when in debug mode
     if utils.is_debug():
         pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
@@ -567,9 +572,10 @@ def run_multi_object_tracking_testcase(model):
                 result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
         pattern = METRIC_PATTERN
         matchs = re.findall(pattern, sout)
-        if matchs and len(matchs) == 1:
-            result["result"][prec].update(get_metric_result(matchs[0]))
-            result["result"][prec]["status"] = "PASS"
+        if matchs:
+            for m in matchs:
+                result["result"][prec].update(get_metric_result(m))
+                result["result"][prec]["status"] = "PASS"
         result["result"][prec]["Cost time (s)"] = t
         logging.debug(f"matchs:\n{matchs}")
     return result