diff --git a/.github/workflow/issue_stale.yaml b/.github/workflow/issue_stale.yaml deleted file mode 100644 index 7c11c2b..0000000 --- a/.github/workflow/issue_stale.yaml +++ /dev/null @@ -1,30 +0,0 @@ -name: Close inactive issues -on: - schedule: - - cron: "35 11 * * 5" - -env: - DAYS_BEFORE_ISSUE_STALE: 30 - DAYS_BEFORE_ISSUE_CLOSE: 14 - -jobs: - close-issues: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@v5 - with: - days-before-issue-stale: ${{ env.DAYS_BEFORE_ISSUE_STALE }} - days-before-issue-close: ${{ env.DAYS_BEFORE_ISSUE_CLOSE }} - stale-issue-label: "stale" - stale-issue-message: | - This issue is stale because it has been open for ${{ env.DAYS_BEFORE_ISSUE_STALE }} days with no activity. - It will be closed if no further activity occurs. Let us know if you still need help! - close-issue-message: | - This issue is being closed because it has been stale for ${{ env.DAYS_BEFORE_ISSUE_CLOSE }} days with no activity. - If you still need help, please feel free to leave comments. - days-before-pr-stale: -1 - days-before-pr-close: -1 - repo-token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 7eed5e2..0000000 --- a/.gitignore +++ /dev/null @@ -1,28 +0,0 @@ - -*.json -logs/* -*.pyc - -tmp_sbatch.sh -*.log - -*.hydra - -*.h5 - -*.zip - -cancel.sh - -# slurm log files -*.err -*.out - -# figure -*.png -*.pdf - -# cuda build files -*.egg-info -build* -dist* \ No newline at end of file diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 0bd27a2..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "OpenSceneFlow"] - path = OpenSceneFlow - url = https://github.com/KTH-RPL/OpenSceneFlow.git diff --git a/OpenSceneFlow b/OpenSceneFlow deleted file mode 160000 index 6226b07..0000000 --- a/OpenSceneFlow +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 6226b07784dbd6f1aa52931d419a7d3707b4e3d3 diff --git a/README.md b/README.md index 49fd33a..c31be02 100644 --- a/README.md +++ b/README.md @@ -1,90 +1,36 @@ SeFlow: A Self-Supervised Scene Flow Method in Autonomous Driving --- -[![arXiv](https://img.shields.io/badge/arXiv-2407.01702-b31b1b?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2407.01702) -[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/seflow-a-self-supervised-scene-flow-method-in/self-supervised-scene-flow-estimation-on-1)](https://paperswithcode.com/sota/self-supervised-scene-flow-estimation-on-1?p=seflow-a-self-supervised-scene-flow-method-in) -[![poster](https://img.shields.io/badge/ECCV24|Poster-6495ed?style=flat&logo=Shotcut&logoColor=wihte)](https://hkustconnect-my.sharepoint.com/:b:/g/personal/qzhangcb_connect_ust_hk/EWyWD-tAX4xIma5U7ZQVk9cBVjsFv0Y_jAC2G7xAB-w4cg?e=c3FbMg) -[![video](https://img.shields.io/badge/video-YouTube-FF0000?logo=youtube&logoColor=white)](https://youtu.be/fQqx2IES-VI) +[arXiv coming soon] [poster comming soon] [viceo coming soon] -![](assets/docs/seflow_arch.png) +We got accepted! I will **update the whole code** around the DDL of the camera-ready version (**Jul'15**). Stay Tunned with us. -Task: __Self-Supervised__ Scene Flow Estimation in Autonomous Driving. No human-label needed. Real-time inference (15-20Hz in RTX3090). +Task: Self-Supervised Scene Flow Estimation in Autonomous Driving. -📜 2025/02/18: Merging all scene flow code to [OpenSceneFLow codebase](https://github.com/KTH-RPL/OpenSceneFlow) for afterward code maintenance. This repo saved README, [cluster slurm files](assets/slurm), and [quick core file](lossfunc.py) in SeFlow. The old source code branch is also [available here](https://github.com/KTH-RPL/SeFlow/tree/source). +**Scripts** quick view in our scripts (We directly follow our previous work [DeFlow code structure](https://github.com/KTH-RPL/DeFlow)): -2024/11/18 16:17: Update model and demo data download link through HuggingFace, Personally I found `wget` from HuggingFace link is much faster than Zenodo. +- `dataprocess/extract_*.py` : pre-process data before training to speed up the whole training time. + [Dataset we included now: Argoverse 2 and Waymo. more on the way: Nuscenes, custom data.] + +- `0_process.py`: process data with save dufomap, cluster labels inside file. -2024/09/26 16:24: All codes already uploaded and tested. You can to try training directly by downloading (through [HuggingFace](https://huggingface.co/kin-zhang/OpenSceneFlow)/[Zenodo](https://zenodo.org/records/13744999)) demo data or pretrained weight for evaluation. +- `1_train.py`: Train the model and get model checkpoints. Pls remember to check the config. -Pre-trained weights for models are available in [Zenodo](https://zenodo.org/records/13744999)/[HuggingFace](https://huggingface.co/kin-zhang/OpenSceneFlow) link. Check usage in [2. Evaluation](#2-evaluation) or [3. Visualization](#3-visualization). +- `2_eval.py` : Evaluate the model on the validation/test set. And also upload to online leaderboard. -## 0. Setup +- `3_vis.py` : For visualization of the results with a video. -**Environment**: Clone the repo and build the environment, check [detail installation](https://github.com/KTH-RPL/OpenSceneFlow/assets/README.md) for more information. [Conda](https://docs.conda.io/projects/miniconda/en/latest/)/[Mamba](https://github.com/mamba-org/mamba) is recommended. +## 0. Setup -```bash -git clone --recursive https://github.com/KTH-RPL/OpenSceneFlow.git -cd OpenSceneFlow -mamba env create -f environment.yaml -``` +**Environment**: Same to DeFlow. -CUDA package (need install nvcc compiler), the compile time is around 1-5 minutes: -```bash -mamba activate opensf -# CUDA already install in python environment. I also tested others version like 11.3, 11.4, 11.7, 11.8 all works -cd assets/cuda/mmcv && python ./setup.py install && cd ../../.. -cd assets/cuda/chamfer3D && python ./setup.py install && cd ../../.. -``` - -Or another environment setup choice is [Docker](https://en.wikipedia.org/wiki/Docker_(software)) which isolated environment, check more information in [OpenSceneFlow/assets/README.md](https://github.com/KTH-RPL/OpenSceneFlow/assets/README.md#docker-environment). ## 1. Run & Train -Note: Prepare raw data and process train data only needed run once for the task. No need repeat the data process steps till you delete all data. We use [wandb](https://wandb.ai/) to log the training process, and you may want to change all `entity="kth-rpl"` to your own entity. - -### Data Preparation - -Check [dataprocess/README.md](dataprocess/README.md#argoverse-20) for downloading tips for the raw Argoverse 2 dataset. Or maybe you want to have the **mini processed dataset** to try the code quickly, We directly provide one scene inside `train` and `val`. It already converted to `.h5` format and processed with the label data. -You can download it from [Zenodo](https://zenodo.org/records/13744999/files/demo_data.zip)/[HuggingFace](https://huggingface.co/kin-zhang/OpenSceneFlow/blob/main/demo_data.zip) and extract it to the data folder. And then you can skip following steps and directly run the [training script](#train-the-model). - -```bash -wget https://huggingface.co/kin-zhang/OpenSceneFlow/resolve/main/demo_data.zip -unzip demo_data.zip -p /home/kin/data/av2 -``` - -#### Process train data - -Process train data for self-supervised learning. Only training data needs this step. [Runtime: Normally need 15 hours for my desktop, 3 hours for the cluster with five available nodes parallel running.] - -```bash -python process.py --data_dir /home/kin/data/av2/preprocess_v2/sensor/train --scene_range 0,701 -``` - -### Train the model - -Train SeFlow needed to specify the loss function, we set the config of our best model in the leaderboard. [Runtime: Around 11 hours in 4x A100 GPUs.] - -```bash -python train.py model=deflow lr=2e-4 epochs=9 batch_size=16 loss_fn=seflowLoss "add_seloss={chamfer_dis: 1.0, static_flow_loss: 1.0, dynamic_chamfer_dis: 1.0, cluster_based_pc0pc1: 1.0}" "model.target.num_iters=2" "model.val_monitor=val/Dynamic/Mean" -``` +coming soon -Or you can directly download the pre-trained weight from [Zenodo](https://zenodo.org/records/13744999/files/seflow_best.ckpt)/[HuggingFace](https://huggingface.co/kin-zhang/OpenSceneFlow/blob/main/seflow_best.zip) and skip the training step. - -### Other Benchmark Models - -You can also train the supervised baseline model in our paper with the following command. [Runtime: Around 10 hours in 4x A100 GPUs.] -```bash -python train.py model=fastflow3d lr=4e-5 epochs=20 batch_size=16 loss_fn=ff3dLoss -python train.py model=deflow lr=2e-4 epochs=20 batch_size=16 loss_fn=deflowLoss -``` - -> [!NOTE] -> You may found the different settings in the paper that is all methods are enlarge learning rate to 2e-4 and decrease the epochs to 20 for faster converge and better performance. -> However, we kept the setting on lr=2e-6 and 50 epochs in (SeFlow & DeFlow) paper experiments for the fair comparison with ZeroFlow where we directly use their provided weights. -> We suggest afterward researchers or users to use the setting here (larger lr and smaller epoch) for faster converge and better performance. - -## 2. Evaluation +## 2. Evaluation & Upload to Leaderboard You can view Wandb dashboard for the training and evaluation results or upload result to online leaderboard. @@ -92,64 +38,42 @@ Since in training, we save all hyper-parameters and model checkpoints, the only ```bash # downloaded pre-trained weight, or train by yourself -wget https://huggingface.co/kin-zhang/OpenSceneFlow/resolve/main/seflow_best.ckpt - -# it will directly prints all metric -python eval.py checkpoint=/home/kin/seflow_best.ckpt av2_mode=val - -# it will output the av2_submit.zip or av2_submit_v2.zip for you to submit to leaderboard -python eval.py checkpoint=/home/kin/seflow_best.ckpt av2_mode=test leaderboard_version=1 -python eval.py checkpoint=/home/kin/seflow_best.ckpt av2_mode=test leaderboard_version=2 +wget TODO +python 2_eval.py checkpoint=/home/kin/seflow_best.ckpt av2_mode=val # it will directly prints all metric +python 2_eval.py checkpoint=/home/kin/seflow_best.ckpt av2_mode=test # it will output the av2_submit.zip for you to submit to leaderboard ``` -And the terminal will output the command for you to submit the result to the online leaderboard. You can follow [this section for evalai](https://github.com/KTH-RPL/DeFlow?tab=readme-ov-file#2-evaluation). - -Check all detailed result files (presented in our paper Table 1) in [this discussion](https://github.com/KTH-RPL/DeFlow/discussions/2). ## 3. Visualization -We provide a script to visualize the results of the model also. You can specify the checkpoint path and the data path to visualize the results. The step is quickly similar to evaluation. +We provide a script to visualize the results of the model. You can specify the checkpoint path and the data path to visualize the results. The step is quickly similar to evaluation. ```bash -python save.py checkpoint=/home/kin/seflow_best.ckpt dataset_path=/home/kin/data/av2/preprocess_v2/sensor/vis - -# The output of above command will be like: -Model: DeFlow, Checkpoint from: /home/kin/model_zoo/v2/seflow_best.ckpt -We already write the flow_est into the dataset, please run following commend to visualize the flow. Copy and paste it to your terminal: -python tools/visualization.py --res_name 'seflow_best' --data_dir /home/kin/data/av2/preprocess_v2/sensor/vis -Enjoy! ^v^ ------ # Then run the command in the terminal: -python tools/visualization.py --res_name 'seflow_best' --data_dir /home/kin/data/av2/preprocess_v2/sensor/vis +python tests/scene_flow.py --flow_mode 'deflow_best' --data_dir /home/kin/data/av2/preprocess/sensor/mini ``` -https://github.com/user-attachments/assets/f031d1a2-2d2f-4947-a01f-834ed1c146e6 - ## Cite & Acknowledgements ``` -@inproceedings{zhang2024seflow, +@article{zhang2024seflow, author={Zhang, Qingwen and Yang, Yi and Li, Peizheng and Andersson, Olov and Jensfelt, Patric}, - title={{SeFlow}: A Self-Supervised Scene Flow Method in Autonomous Driving}, - booktitle={European Conference on Computer Vision (ECCV)}, - year={2024}, - pages={353–369}, - organization={Springer}, - doi={10.1007/978-3-031-73232-4_20}, + title={SeFlow: A Self-Supervised Scene Flow Method in Autonomous Driving}, + journal={arXiv preprint arXiv:TODO}, + year={2024} } -@inproceedings{zhang2024deflow, +@article{zhang2024deflow, author={Zhang, Qingwen and Yang, Yi and Fang, Heng and Geng, Ruoyu and Jensfelt, Patric}, - booktitle={2024 IEEE International Conference on Robotics and Automation (ICRA)}, - title={{DeFlow}: Decoder of Scene Flow Network in Autonomous Driving}, - year={2024}, - pages={2105-2111}, - doi={10.1109/ICRA57147.2024.10610278} + title={DeFlow: Decoder of Scene Flow Network in Autonomous Driving}, + journal={arXiv preprint arXiv:2401.16122}, + year={2024} } ``` -💞 Thanks to RPL member: [Li Ling](https://www.kth.se/profile/liling) helps revise our SeFlow manuscript. Thanks to [Kyle Vedder](https://kylevedder.github.io), who kindly opened his code (ZeroFlow) including pre-trained weights, and discussed their result with us which helped this work a lot. - +Thanks to RPL member: Li Ling helps review our SeFlow manuscript. +Thanks to [Kyle Vedder (ZeroFlow)](https://github.com/kylevedder), who kindly opened his code including pre-trained weights, and discussed their result with us which helped this work a lot. This work was partially supported by the Wallenberg AI, Autonomous Systems and Software Program (WASP) funded by the Knut and Alice Wallenberg Foundation and Prosense (2020-02963) funded by Vinnova. The computations were enabled by the supercomputing resource Berzelius provided by National Supercomputer Centre at Linköping University and the Knut and Alice Wallenberg Foundation, Sweden. diff --git a/assets/docs/seflow_arch.png b/assets/docs/seflow_arch.png deleted file mode 100644 index 399a67a..0000000 Binary files a/assets/docs/seflow_arch.png and /dev/null differ diff --git a/assets/slurm/0_process.sh b/assets/slurm/0_process.sh deleted file mode 100644 index 2b0392b..0000000 --- a/assets/slurm/0_process.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -#SBATCH -J pack_data -#SBATCH --gpus 0 -#SBATCH --cpus-per-task 64 -#SBATCH --mem 256G -#SBATCH --mincpus=64 -#SBATCH -t 1-00:00:00 -#SBATCH --mail-type=END,FAIL -#SBATCH --mail-user=qingwen@kth.se -#SBATCH --output /proj/berzelius-2023-154/users/x_qinzh/workspace/SeFlow/logs/slurm/%J_data.out -#SBATCH --error /proj/berzelius-2023-154/users/x_qinzh/workspace/SeFlow/logs/slurm/%J_data.err - -cd /proj/berzelius-2023-154/users/x_qinzh/workspace/SeFlow -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/proj/berzelius-2023-154/users/x_qinzh/mambaforge/lib -# export HYDRA_FULL_ERROR=1 - -/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/dataprocess/bin/python dataprocess/extract_av2.py --nproc 64 \ - --av2_type sensor \ - --data_mode train \ - --argo_dir /proj/berzelius-2023-154/users/x_qinzh/av2 \ - --output_dir /proj/berzelius-2023-364/users/x_qinzh/data/av2/preprocess_v2 - -/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/dataprocess/bin/python dataprocess/extract_av2.py --nproc 64 \ - --av2_type sensor \ - --data_mode val \ - --argo_dir /proj/berzelius-2023-154/users/x_qinzh/av2 \ - --output_dir /proj/berzelius-2023-364/users/x_qinzh/data/av2/preprocess_v2 \ - --mask_dir /proj/berzelius-2023-154/users/x_qinzh/av2/3d_scene_flow - -/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/dataprocess/bin/python dataprocess/extract_av2.py --nproc 64 \ - --av2_type sensor \ - --data_mode test \ - --argo_dir /proj/berzelius-2023-154/users/x_qinzh/av2 \ - --output_dir /proj/berzelius-2023-364/users/x_qinzh/data/av2/preprocess_v2 \ - --mask_dir /proj/berzelius-2023-154/users/x_qinzh/av2/3d_scene_flow \ No newline at end of file diff --git a/assets/slurm/1_train.sh b/assets/slurm/1_train.sh deleted file mode 100644 index 72c0d92..0000000 --- a/assets/slurm/1_train.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -#SBATCH -J seflow -#SBATCH --gpus 4 -C "fat" -#SBATCH -t 3-00:00:00 -#SBATCH --mail-type=END,FAIL -#SBATCH --mail-user=qingwen@kth.se -#SBATCH --output /proj/berzelius-2023-154/users/x_qinzh/seflow/logs/slurm/%J_seflow.out -#SBATCH --error /proj/berzelius-2023-154/users/x_qinzh/seflow/logs/slurm/%J_seflow.err - -cd /proj/berzelius-2023-154/users/x_qinzh/seflow - -SOURCE="/proj/berzelius-2023-154/users/x_qinzh/data/av2/preprocess_v2" -DEST="/scratch/local/av2" -SUBDIRS=("sensor/train" "sensor/val") - -start_time=$(date +%s) -for dir in "${SUBDIRS[@]}"; do - mkdir -p "${DEST}/${dir}" - find "${SOURCE}/${dir}" -type f -print0 | xargs -0 -n1 -P16 cp -t "${DEST}/${dir}" & -done -wait -end_time=$(date +%s) -elapsed=$((end_time - start_time)) -echo "Copy ${SOURCE} to ${DEST} Total time: ${elapsed} seconds" -echo "Start training..." - -# ====> paper model = seflow_official -# /proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python train.py \ -# slurm_id=$SLURM_JOB_ID wandb_mode=online train_data=/scratch/local/av2/sensor/train val_data=/scratch/local/av2/sensor/val \ -# num_workers=16 model=deflow lr=2e-6 epochs=50 batch_size=20 "model.target.num_iters=2" "model.val_monitor=val/Dynamic/Mean" \ -# loss_fn=seflowLoss "add_seloss={chamfer_dis: 1.0, static_flow_loss: 1.0, dynamic_chamfer_dis: 1.0, cluster_based_pc0pc1: 1.0}" - -# ====> leaderboard model = seflow_best -/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python train.py \ - slurm_id=$SLURM_JOB_ID wandb_mode=online train_data=/scratch/local/av2/sensor/train val_data=/scratch/local/av2/sensor/val \ - num_workers=16 model=deflow lr=2e-4 epochs=9 batch_size=16 "model.target.num_iters=2" "model.val_monitor=val/Dynamic/Mean" \ - loss_fn=seflowLoss "add_seloss={chamfer_dis: 1.0, static_flow_loss: 1.0, dynamic_chamfer_dis: 1.0, cluster_based_pc0pc1: 1.0}" diff --git a/assets/slurm/2_eval.sh b/assets/slurm/2_eval.sh deleted file mode 100644 index 30684cd..0000000 --- a/assets/slurm/2_eval.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -#SBATCH -J eval -#SBATCH --gpus 1 -#SBATCH -t 01:00:00 -#SBATCH --output /proj/berzelius-2023-154/users/x_qinzh/seflow/logs/slurm/%J_eval.out -#SBATCH --error /proj/berzelius-2023-154/users/x_qinzh/seflow/logs/slurm/%J_eval.err - -cd /proj/berzelius-2023-154/users/x_qinzh/seflow - -SOURCE="/proj/berzelius-2023-154/users/x_qinzh/av2/preprocess_v2" -DEST="/scratch/local/av2" -SUBDIRS=("sensor/val") - -start_time=$(date +%s) -for dir in "${SUBDIRS[@]}"; do - mkdir -p "${DEST}/${dir}" - find "${SOURCE}/${dir}" -type f -print0 | xargs -0 -n1 -P16 cp -t "${DEST}/${dir}" & -done -wait -end_time=$(date +%s) -elapsed=$((end_time - start_time)) -echo "Copy ${SOURCE} to ${DEST} Total time: ${elapsed} seconds" -echo "Start training..." - -# ====> leaderboard model -# /proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python eval.py \ -# wandb_mode=online dataset_path=/scratch/local/av2/sensor \ -# checkpoint=/proj/berzelius-2023-154/users/x_qinzh/seflow/logs/wandb/seflow-10086990/checkpoints/epoch_19_seflow.ckpt \ -# av2_mode=test save_res=True - -/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python eval.py \ - wandb_mode=online dataset_path=/scratch/local/av2/sensor av2_mode=val \ - checkpoint=/proj/berzelius-2023-154/users/x_qinzh/seflow/logs/wandb/seflow-10086990/checkpoints/epoch_19_seflow.ckpt \ No newline at end of file diff --git a/assets/slurm/dufolabel_sbatch.py b/assets/slurm/dufolabel_sbatch.py deleted file mode 100644 index 1a10ebc..0000000 --- a/assets/slurm/dufolabel_sbatch.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -# Created: 2023-11-30 17:02 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# -# Description: Write sbatch files for DUFO jobs on cluster (SLURM), no GPU needed -# Reference: -# * ZeroFlow data sbatch: https://github.com/kylevedder/zeroflow/blob/master/data_prep_scripts/split_nsfp_jobs_sbatch.py - -# Run with following commands (only train need to be processed with dufo label) -- python assets/slurm/dufolabel_sbatch.py --split 50 --total 700 --interval 1 --data_dir /home/kin/data/av2/preprocess/sensor/train --data_mode train -- python assets/slurm/dufolabel_sbatch.py --split 100 --total 800 --interval 2 --data_dir /proj/berzelius-2023-154/users/x_qinzh/dataset/waymo/fix_preprocess/train -""" - -import fire, time, os -def main( - data_dir: str = "/proj/berzelius-2023-154/users/x_qinzh/av2/preprocess/sensor/train", - split: int = 50, - total: int = 2001, - interval: int = 1, -): - # +1 因为range是左闭右开 - for i in range(0, total+1, split): - scene_range = [i , min(i + split, total+1)] - print(scene_range) - sbatch_file_content = \ - f"""#!/bin/bash -#SBATCH -J pack_{scene_range[0]}_{scene_range[1]} -#SBATCH --gpus 0 -#SBATCH --cpus-per-task 32 -#SBATCH --mem 64G -#SBATCH --mincpus=32 -#SBATCH -t 1-00:00:00 -#SBATCH --mail-type=END,FAIL -#SBATCH --mail-user=qingwen@kth.se -#SBATCH --output /proj/berzelius-2023-154/users/x_qinzh/seflow/logs/slurm/0_lidar/%J_{scene_range[0]}_{scene_range[1]}.out -#SBATCH --error /proj/berzelius-2023-154/users/x_qinzh/seflow/logs/slurm/0_lidar/%J_{scene_range[0]}_{scene_range[1]}.err - -cd /proj/berzelius-2023-154/users/x_qinzh/seflow -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/proj/berzelius-2023-154/users/x_qinzh/mambaforge/lib - -/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python process.py \\ - --data_dir {data_dir} \\ - --interval {interval} \\ - --scene_range {scene_range[0]},{scene_range[1]} - - """ - # run command sh sbatch_file_content - with open(f"tmp_sbatch.sh", "w") as f: - f.write(sbatch_file_content) - print(f"Write sbatch file: tmp_sbatch.sh") - os.system(f"sbatch tmp_sbatch.sh") - -if __name__ == '__main__': - start_time = time.time() - fire.Fire(main) - print(f"\nTime used: {(time.time() - start_time)/60:.2f} mins") \ No newline at end of file diff --git a/lossfunc.py b/lossfunc.py deleted file mode 100644 index cd161d9..0000000 --- a/lossfunc.py +++ /dev/null @@ -1,157 +0,0 @@ -""" -# Created: 2023-07-17 00:00 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of DeFlow (https://github.com/KTH-RPL/DeFlow) and SeFlow (https://github.com/KTH-RPL/SeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. -# -# Description: Define the loss function for training. -""" -import torch -from assets.cuda.chamfer3D import nnChamferDis -MyCUDAChamferDis = nnChamferDis() -from src.utils.av2_eval import CATEGORY_TO_INDEX, BUCKETED_METACATAGORIES - -# NOTE(Qingwen 24/07/06): squared, so it's sqrt(4) = 2m, in 10Hz the vel = 20m/s ~ 72km/h -# If your scenario is different, may need adjust this TRUNCATED to 80-120km/h vel. -TRUNCATED_DIST = 4 - - -def seflowLoss(res_dict, timer=None): - pc0_label = res_dict['pc0_labels'] - pc1_label = res_dict['pc1_labels'] - - pc0 = res_dict['pc0'] - pc1 = res_dict['pc1'] - - est_flow = res_dict['est_flow'] - - pseudo_pc1from0 = pc0 + est_flow - - unique_labels = torch.unique(pc0_label) - pc0_dynamic = pc0[pc0_label > 0] - pc1_dynamic = pc1[pc1_label > 0] - # fpc1_dynamic = pseudo_pc1from0[pc0_label > 0] - # NOTE(Qingwen): since we set THREADS_PER_BLOCK is 256 - have_dynamic_cluster = (pc0_dynamic.shape[0] > 256) & (pc1_dynamic.shape[0] > 256) - - # first item loss: chamfer distance - # timer[5][1].start("MyCUDAChamferDis") - # raw: pc0 to pc1, est: pseudo_pc1from0 to pc1, idx means the nearest index - est_dist0, est_dist1, _, _ = MyCUDAChamferDis.disid_res(pseudo_pc1from0, pc1) - raw_dist0, raw_dist1, raw_idx0, _ = MyCUDAChamferDis.disid_res(pc0, pc1) - chamfer_dis = torch.mean(est_dist0[est_dist0 <= TRUNCATED_DIST]) + torch.mean(est_dist1[est_dist1 <= TRUNCATED_DIST]) - # timer[5][1].stop() - - # second item loss: dynamic chamfer distance - # timer[5][2].start("DynamicChamferDistance") - dynamic_chamfer_dis = torch.tensor(0.0, device=est_flow.device) - if have_dynamic_cluster: - dynamic_chamfer_dis += MyCUDAChamferDis(pseudo_pc1from0[pc0_label>0], pc1_dynamic, truncate_dist=TRUNCATED_DIST) - # timer[5][2].stop() - - # third item loss: exclude static points' flow - # NOTE(Qingwen): add in the later part on label==0 - static_cluster_loss = torch.tensor(0.0, device=est_flow.device) - - # fourth item loss: same label points' flow should be the same - # timer[5][3].start("SameClusterLoss") - moved_cluster_loss = torch.tensor(0.0, device=est_flow.device) - moved_cluster_norms = torch.tensor([], device=est_flow.device) - for label in unique_labels: - mask = pc0_label == label - if label == 0: - # Eq. 6 in the paper - static_cluster_loss += torch.linalg.vector_norm(est_flow[mask, :], dim=-1).mean() - elif label > 0 and have_dynamic_cluster: - cluster_id_flow = est_flow[mask, :] - cluster_nnd = raw_dist0[mask] - if cluster_nnd.shape[0] <= 0: - continue - - # Eq. 8 in the paper - sorted_idxs = torch.argsort(cluster_nnd, descending=True) - nearby_label = pc1_label[raw_idx0[mask][sorted_idxs]] # nonzero means dynamic in label - non_zero_valid_indices = torch.nonzero(nearby_label > 0) - if non_zero_valid_indices.shape[0] <= 0: - continue - max_idx = sorted_idxs[non_zero_valid_indices.squeeze(1)[0]] - - # Eq. 9 in the paper - max_flow = pc1[raw_idx0[mask][max_idx]] - pc0[mask][max_idx] - - # Eq. 10 in the paper - moved_cluster_norms = torch.cat((moved_cluster_norms, torch.linalg.vector_norm((cluster_id_flow - max_flow), dim=-1))) - - if moved_cluster_norms.shape[0] > 0: - moved_cluster_loss = moved_cluster_norms.mean() # Eq. 11 in the paper - elif have_dynamic_cluster: - moved_cluster_loss = torch.mean(raw_dist0[raw_dist0 <= TRUNCATED_DIST]) + torch.mean(raw_dist1[raw_dist1 <= TRUNCATED_DIST]) - # timer[5][3].stop() - - res_loss = { - 'chamfer_dis': chamfer_dis, - 'dynamic_chamfer_dis': dynamic_chamfer_dis, - 'static_flow_loss': static_cluster_loss, - 'cluster_based_pc0pc1': moved_cluster_loss, - } - return res_loss - -def deflowLoss(res_dict): - pred = res_dict['est_flow'] - gt = res_dict['gt_flow'] - - mask_no_nan = (~gt.isnan() & ~pred.isnan() & ~gt.isinf() & ~pred.isinf()) - - pred = pred[mask_no_nan].reshape(-1, 3) - gt = gt[mask_no_nan].reshape(-1, 3) - - speed = gt.norm(dim=1, p=2) / 0.1 - # pts_loss = torch.norm(pred - gt, dim=1, p=2) - pts_loss = torch.linalg.vector_norm(pred - gt, dim=-1) - - weight_loss = 0.0 - speed_0_4 = pts_loss[speed < 0.4].mean() - speed_mid = pts_loss[(speed >= 0.4) & (speed <= 1.0)].mean() - speed_1_0 = pts_loss[speed > 1.0].mean() - if ~speed_1_0.isnan(): - weight_loss += speed_1_0 - if ~speed_0_4.isnan(): - weight_loss += speed_0_4 - if ~speed_mid.isnan(): - weight_loss += speed_mid - return {'loss': weight_loss} - -# ref from zeroflow loss class FastFlow3DDistillationLoss() -def zeroflowLoss(res_dict): - pred = res_dict['est_flow'] - gt = res_dict['gt_flow'] - mask_no_nan = (~gt.isnan() & ~pred.isnan() & ~gt.isinf() & ~pred.isinf()) - - pred = pred[mask_no_nan].reshape(-1, 3) - gt = gt[mask_no_nan].reshape(-1, 3) - - error = torch.linalg.vector_norm(pred - gt, dim=-1) - # gt_speed = torch.norm(gt, dim=1, p=2) * 10.0 - gt_speed = torch.linalg.vector_norm(gt, dim=-1) * 10.0 - - mins = torch.ones_like(gt_speed) * 0.1 - maxs = torch.ones_like(gt_speed) - importance_scale = torch.max(mins, torch.min(1.8 * gt_speed - 0.8, maxs)) - # error = torch.norm(pred - gt, dim=1, p=2) * importance_scale - error = error * importance_scale - return {'loss': error.mean()} - -# ref from zeroflow loss class FastFlow3DSupervisedLoss() -def ff3dLoss(res_dict): - pred = res_dict['est_flow'] - gt = res_dict['gt_flow'] - classes = res_dict['gt_classes'] - # error = torch.norm(pred - gt, dim=1, p=2) - error = torch.linalg.vector_norm(pred - gt, dim=-1) - is_foreground_class = (classes > 0) # 0 is background, ref: FOREGROUND_BACKGROUND_BREAKDOWN - background_scalar = is_foreground_class.float() * 0.9 + 0.1 - error = error * background_scalar - return {'loss': error.mean()}