From 64e96ccb138ab6cbb8bb480c5ddcd12b112beb51 Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 9 Sep 2021 13:30:31 +0200 Subject: [PATCH 01/58] Update FUNDING.yml --- .github/FUNDING.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 750ef1a..7c1c5c4 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1 @@ github: mre -patreon: hellorust From 74a1c24082722797fbfee26da65e66d7a2b30a87 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 26 Dec 2021 23:32:55 +0100 Subject: [PATCH 02/58] Bump tinysearch version and Rust version for release (#148) --- Cargo.lock | 4 ++-- bin/Cargo.toml | 7 ++++--- engine/Cargo.toml | 6 +++--- shared/Cargo.toml | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5836004..81990de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "tinysearch" -version = "0.6.2" +version = "0.6.3" dependencies = [ "anyhow", "argh", @@ -480,7 +480,7 @@ dependencies = [ [[package]] name = "tinysearch-shared" -version = "0.6.2" +version = "0.6.3" dependencies = [ "bincode", "serde", diff --git a/bin/Cargo.toml b/bin/Cargo.toml index e73314e..1e87926 100644 --- a/bin/Cargo.toml +++ b/bin/Cargo.toml @@ -1,16 +1,17 @@ [package] name = "tinysearch" authors = ["Matthias Endler "] -version = "0.6.2" -edition = "2018" +version = "0.6.3" +edition = "2021" description = "A tiny search engine for static websites" license = "Apache-2.0/MIT" documentation = "https://github.com/mre/tinysearch/blob/master/README.md" homepage = "https://github.com/mre/tinysearch" [dependencies] +tinysearch-shared = { path = "../shared", version = "0.6.3" } + argh = "0.1.5" -tinysearch-shared = { path = "../shared", version = "0.6.2" } log = "0.4.14" serde_json = "1.0.66" anyhow = "1.0.43" diff --git a/engine/Cargo.toml b/engine/Cargo.toml index 5fb1857..2da0dc5 100644 --- a/engine/Cargo.toml +++ b/engine/Cargo.toml @@ -1,8 +1,8 @@ [package] name = "tinysearch-engine" authors = ["Matthias Endler "] -version = "0.6.2" -edition = "2018" +version = "0.6.3" +edition = "2021" description = "A tiny search engine for static websites" license = "Apache-2.0/MIT" documentation = "https://github.com/mre/tinysearch/blob/master/README.md" @@ -17,7 +17,7 @@ once_cell = "1.8.0" [dependencies.tinysearch-shared] path = "../shared" -version = "0.6.2" +version = "0.6.3" [dependencies.xorf] version = "0.7.2" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 546e643..c5c2621 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,8 +1,8 @@ [package] name = "tinysearch-shared" -version = "0.6.2" +version = "0.6.3" authors = ["Matthias Endler "] -edition = "2018" +edition = "2021" description = "Shared libraries for tinysearch - a tiny search engine for static websites" license = "Apache-2.0/MIT" From 18b0dc5d86902ac5e3266ca1dfa6787e54666663 Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 29 Dec 2021 02:59:58 +0100 Subject: [PATCH 03/58] Add release workflow --- .github/workflows/release.yml | 157 ++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..9f85ced --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,157 @@ +name: release +on: + release: + types: + - edited + - published + +env: + CARGO_TERM_COLOR: always + +jobs: + prepare: + name: Build release binary + runs-on: ubuntu-latest + outputs: + tag_name: ${{ steps.get_release.outputs.tag_name }} + upload_url: ${{ steps.get_release.outputs.upload_url }} + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Get release + id: get_release + uses: bruceadams/get-release@v1.2.2 + env: + GITHUB_TOKEN: ${{ github.token }} + + linux: + runs-on: ubuntu-latest + needs: prepare + strategy: + matrix: + target: + - x86_64-unknown-linux-gnu + - x86_64-unknown-linux-musl + - arm-unknown-linux-gnueabihf + - arm-unknown-linux-musleabi + - arm-unknown-linux-musleabihf + - aarch64-unknown-linux-gnu + - mips-unknown-linux-musl + - mipsel-unknown-linux-musl + fail-fast: false + steps: + - name: Install musl tools + if: ${{ contains(matrix.target, 'musl') }} + run: sudo apt-get install -y musl-tools + + - name: Install arm tools + if: ${{ contains(matrix.target, 'arm') }} + run: | + echo "GNU_PREFIX=arm-linux-gnueabihf-" >> $GITHUB_ENV + sudo apt-get install -y binutils-arm-linux-gnueabihf + + - name: Install aarch64 tools + if: ${{ contains(matrix.target, 'aarch64') }} + run: | + echo "GNU_PREFIX=aarch64-linux-gnu-" >> $GITHUB_ENV + sudo apt-get install -y binutils-aarch64-linux-gnu + + - uses: actions/checkout@v2 + + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + default: true + + - name: Build ${{ matrix.target }} + uses: actions-rs/cargo@v1 + with: + command: build + args: --release --target ${{ matrix.target }} + use-cross: true + + - name: Optimize and package binary + run: | + cd target/${{ matrix.target }}/release + ${GNU_PREFIX}strip tinysearch + chmod +x tinysearch + tar -c tinysearch | gzip > tinysearch.tar.gz + + - name: Upload binary + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + asset_name: tinysearch-${{needs.prepare.outputs.tag_name}}-${{ matrix.target }}.tar.gz + asset_path: target/${{ matrix.target }}/release/tinysearch.tar.gz + upload_url: ${{needs.prepare.outputs.upload_url}} + asset_content_type: application/gzip + + macos: + runs-on: macos-latest + needs: prepare + steps: + - uses: actions/checkout@v2 + + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + default: true + + - name: Build binary + uses: actions-rs/cargo@v1 + with: + command: build + args: --release + use-cross: true + + - name: Optimize and package binary + run: | + cd target/release + strip tinysearch + chmod +x tinysearch + mkdir dmg + mv tinysearch dmg/ + hdiutil create -fs HFS+ -srcfolder dmg -volname tinysearch tinysearch.dmg + + - name: Upload binary + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + asset_name: tinysearch-${{needs.prepare.outputs.tag_name}}-macos-x86_64.dmg + asset_path: target/release/tinysearch.dmg + upload_url: ${{needs.prepare.outputs.upload_url}} + asset_content_type: application/octet-stream + + windows: + runs-on: windows-latest + needs: prepare + steps: + - uses: actions/checkout@v2 + + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + default: true + + - name: Build binary + uses: actions-rs/cargo@v1 + with: + command: build + args: --release + use-cross: true + + - name: Upload binary + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + asset_name: tinysearch-${{needs.prepare.outputs.tag_name}}-windows-x86_64.exe + asset_path: target/release/tinysearch.exe + upload_url: ${{needs.prepare.outputs.upload_url}} + asset_content_type: application/octet-stream From 99ef133f10ca486d8317bf319dbb5d4d7b1eb9f0 Mon Sep 17 00:00:00 2001 From: expilo Date: Tue, 18 Jan 2022 10:07:38 +0100 Subject: [PATCH 04/58] Update README.md (#150) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 39d9214..e9a0bec 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,7 @@ The following websites use tinysearch: * [Matthias Endler's blog](https://endler.dev/2019/tinysearch/) * [OutOfCheeseError](https://out-of-cheese-error.netlify.app/) +* [Museum of Warsaw Archdiocese](https://maw.art.pl/cyfrowemaw/) Are you using tinysearch, too? Add your site here! From e00555de3d9b9378a19895c8f619b0cf7572af53 Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 5 Apr 2022 11:50:38 +0200 Subject: [PATCH 05/58] Fix copying engine files (based on #153) (#154) --- bin/src/main.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/src/main.rs b/bin/src/main.rs index fc6202e..4db01ca 100644 --- a/bin/src/main.rs +++ b/bin/src/main.rs @@ -91,6 +91,9 @@ fn main() -> Result<(), Error> { debug!("Crate content extracted to {:?}/", &temp_dir); let engine_dir = temp_dir.path().join("engine"); + if !engine_dir.exists() { + fs::create_dir_all(&engine_dir)?; + } if !engine_dir.exists() { for path in fs::read_dir(out_path)? { println!("Name: {}", path.unwrap().path().display()) @@ -102,7 +105,7 @@ fn main() -> Result<(), Error> { } println!("Copying index into crate"); - fs::rename("storage", engine_dir.join("storage"))?; + fs::copy("storage", engine_dir.join("storage"))?; println!("Compiling WASM module using wasm-pack"); wasm_pack(&temp_dir.path().join("engine"), &out_path)?; From 14ef25ca7ec04566f259cc1baabd7e220dd4cfe7 Mon Sep 17 00:00:00 2001 From: Luis Pedraza Date: Tue, 12 Apr 2022 15:56:38 +0200 Subject: [PATCH 06/58] change default output folder to ./wasm_output instead of local dir (#156) moved default output directory creation to the argh struct --- .gitignore | 1 + bin/src/main.rs | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 7b6043c..76c9e2d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,6 @@ tinysearch_engine_bg.d.ts tinysearch_engine_bg.wasm.d.ts package.json pkg/ +wasm_output/ demo.html .vscode diff --git a/bin/src/main.rs b/bin/src/main.rs index 4db01ca..e7a196e 100644 --- a/bin/src/main.rs +++ b/bin/src/main.rs @@ -22,6 +22,11 @@ include!(concat!(env!("OUT_DIR"), "/engine.rs")); // Include a bare-bones HTML page that demonstrates how tinysearch is used static DEMO_HTML: &str = include_str!("../assets/demo.html"); +fn default_output_dir() -> PathBuf { + fs::create_dir_all("./wasm_output").unwrap(); + PathBuf::from("./wasm_output") +} + #[derive(FromArgs)] /// A tiny, static search engine for static websites struct Opt { @@ -33,9 +38,9 @@ struct Opt { #[argh(positional)] index: Option, - /// output path for WASM module (local directory by default) - #[argh(option, short = 'p', long = "path")] - out_path: Option, + /// output path for WASM module ("wasm_output" directory by default) + #[argh(option, short = 'p', long = "path", default = "default_output_dir()")] + out_path: PathBuf, /// optimize the output using binaryen #[argh(switch, short = 'o', long = "optimize")] @@ -72,10 +77,7 @@ fn main() -> Result<(), Error> { std::process::exit(0); } - let out_path = opt - .out_path - .unwrap_or_else(|| PathBuf::from(".")) - .canonicalize()?; + let out_path = opt.out_path.canonicalize()?; let index = opt.index.context("No index file specified")?; let posts: Posts = index::read(fs::read_to_string(index)?)?; @@ -92,7 +94,7 @@ fn main() -> Result<(), Error> { let engine_dir = temp_dir.path().join("engine"); if !engine_dir.exists() { - fs::create_dir_all(&engine_dir)?; + fs::create_dir_all(&engine_dir)?; } if !engine_dir.exists() { for path in fs::read_dir(out_path)? { From 4bb72ea6d903252228ec9bc066ae4dedb12c156f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Han=20Seung=20Min=20-=20=ED=95=9C=EC=8A=B9=EB=AF=BC?= Date: Tue, 7 Jun 2022 02:23:19 +0530 Subject: [PATCH 07/58] Docs: A better solution for generating search index for zola (#158) This solution involves using macros and a bit of recursion, but works well Basically in the previous solution only top-level pages were indexed, now pages from subsections are also indexed --- howto/zola.md | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/howto/zola.md b/howto/zola.md index c984317..d5c8dbb 100644 --- a/howto/zola.md +++ b/howto/zola.md @@ -2,22 +2,35 @@ 1. Create a template, which iterates over all pages and creates our JSON structure. -`templates/json.html`: +`macros/create_data.html`: ```liquid -{% set section = get_section(path="_index.md") %} - -[ +{%- macro from_section(section) -%} +{%- set section = get_section(path=section) -%} {%- for post in section.pages -%} -{% if not post.draft %} +{%- if not post.draft -%} { "title": {{ post.title | striptags | json_encode | safe }}, "url": {{ post.permalink | json_encode | safe }}, "body": {{ post.content | striptags | json_encode | safe }} -}{% if not loop.last %},{% endif %} -{% endif %} +} +{%- if not loop.last -%},{%- endif %} +{%- endif -%} {%- endfor -%} -] +{%- if section.subsections -%} +, +{%- for subsection in section.subsections -%} +{{ self::from_section(section=subsection) }} +{%- endfor -%} +{%- endif -%} +{%- endmacro from_section -%} +``` + +`templates/json.html`: + +```liquid +{%- import "macros/create_data.html" as create_data -%} +[{{ create_data::from_section(section="_index.md") }}] ``` 2. Create a static page using the template. From fbb5ecf3931aea67a176aeac93c9ddde399d7515 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 6 Aug 2022 18:33:15 +0200 Subject: [PATCH 08/58] Bump version --- bin/Cargo.toml | 2 +- shared/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/Cargo.toml b/bin/Cargo.toml index 1e87926..e240ab1 100644 --- a/bin/Cargo.toml +++ b/bin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tinysearch" authors = ["Matthias Endler "] -version = "0.6.3" +version = "0.6.4" edition = "2021" description = "A tiny search engine for static websites" license = "Apache-2.0/MIT" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index c5c2621..fc4da09 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tinysearch-shared" -version = "0.6.3" +version = "0.6.4" authors = ["Matthias Endler "] edition = "2021" description = "Shared libraries for tinysearch - a tiny search engine for static websites" From 7de510342171fadab14f0d19d0755b1c54a5e47c Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 6 Aug 2022 18:38:35 +0200 Subject: [PATCH 09/58] Bump version --- .github/workflows/docker.yml | 2 +- Cargo.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f974fc4..f444baf 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@master - name: Publish to Registry - uses: elgohr/Publish-Docker-Github-Action@3.01 + uses: elgohr/Publish-Docker-Github-Action@4 with: name: tinysearch/cli username: ${{ secrets.DOCKER_USERNAME }} diff --git a/Cargo.lock b/Cargo.lock index 81990de..fae7042 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "tinysearch" -version = "0.6.3" +version = "0.6.4" dependencies = [ "anyhow", "argh", @@ -480,7 +480,7 @@ dependencies = [ [[package]] name = "tinysearch-shared" -version = "0.6.3" +version = "0.6.4" dependencies = [ "bincode", "serde", From fb5554db2ac096643a65f372ce1657811e01f055 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 6 Aug 2022 18:39:39 +0200 Subject: [PATCH 10/58] Bump version --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f444baf..9053151 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@master - name: Publish to Registry - uses: elgohr/Publish-Docker-Github-Action@4 + uses: elgohr/Publish-Docker-Github-Action@v4 with: name: tinysearch/cli username: ${{ secrets.DOCKER_USERNAME }} From 1928a1d5b1e24a59b6038c4588a65844b4294339 Mon Sep 17 00:00:00 2001 From: Jake G <106644+Jieiku@users.noreply.github.com> Date: Tue, 27 Sep 2022 09:20:27 -0700 Subject: [PATCH 11/58] meta field (#161) --- bin/src/index.rs | 1 + bin/src/storage.rs | 9 +++++---- shared/src/lib.rs | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/bin/src/index.rs b/bin/src/index.rs index 78e231b..301f9f1 100644 --- a/bin/src/index.rs +++ b/bin/src/index.rs @@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize}; pub struct Post { pub title: String, pub url: String, + pub meta: Option, pub body: Option, } diff --git a/bin/src/storage.rs b/bin/src/storage.rs index 9b8347a..b0c9979 100644 --- a/bin/src/storage.rs +++ b/bin/src/storage.rs @@ -81,7 +81,7 @@ pub fn prepare_posts(posts: Posts) -> HashMap> { let mut prepared: HashMap> = HashMap::new(); for post in posts { debug!("Analyzing {}", post.url); - prepared.insert((post.title, post.url), post.body); + prepared.insert((post.title, post.url, post.meta), post.body); } prepared } @@ -97,10 +97,11 @@ mod tests { let mut posts = HashMap::new(); posts.insert( ( - "Maybe You Don't Need Kubernetes, Or Excel - You Know".to_string(), - "".to_string(), + "Maybe You Don't Need Kubernetes, Or Excel - You Know".to_string(),//title + "".to_string(),//url + None,//meta ), - None, + None,//body ); let filters = generate_filters(posts).unwrap(); assert_eq!(filters.len(), 1); diff --git a/shared/src/lib.rs b/shared/src/lib.rs index 09e402e..86afffd 100644 --- a/shared/src/lib.rs +++ b/shared/src/lib.rs @@ -7,7 +7,8 @@ use std::collections::hash_map::DefaultHasher; type Title = String; type Url = String; -pub type PostId = (Title, Url); +type Meta = Option; +pub type PostId = (Title, Url, Meta); pub type PostFilter = (PostId, HashProxy); pub type Filters = Vec; From 5d98c355e4f6a0e8c7f67e5764cd0322883ecdef Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 27 Sep 2022 18:30:24 +0200 Subject: [PATCH 12/58] Publish latest version --- Cargo.lock | 4 ++-- bin/Cargo.toml | 5 +++-- engine/Cargo.toml | 4 ++-- shared/Cargo.toml | 5 ++++- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fae7042..0e8e654 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "tinysearch" -version = "0.6.4" +version = "0.7.0" dependencies = [ "anyhow", "argh", @@ -480,7 +480,7 @@ dependencies = [ [[package]] name = "tinysearch-shared" -version = "0.6.4" +version = "0.7.0" dependencies = [ "bincode", "serde", diff --git a/bin/Cargo.toml b/bin/Cargo.toml index e240ab1..672e709 100644 --- a/bin/Cargo.toml +++ b/bin/Cargo.toml @@ -1,15 +1,16 @@ [package] name = "tinysearch" authors = ["Matthias Endler "] -version = "0.6.4" +version = "0.7.0" edition = "2021" description = "A tiny search engine for static websites" license = "Apache-2.0/MIT" documentation = "https://github.com/mre/tinysearch/blob/master/README.md" homepage = "https://github.com/mre/tinysearch" +repository = "https://github.com/mre/tinysearch" [dependencies] -tinysearch-shared = { path = "../shared", version = "0.6.3" } +tinysearch-shared = { path = "../shared", version = "0.7.0" } argh = "0.1.5" log = "0.4.14" diff --git a/engine/Cargo.toml b/engine/Cargo.toml index 2da0dc5..54ae348 100644 --- a/engine/Cargo.toml +++ b/engine/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tinysearch-engine" authors = ["Matthias Endler "] -version = "0.6.3" +version = "0.7.0" edition = "2021" description = "A tiny search engine for static websites" license = "Apache-2.0/MIT" @@ -17,7 +17,7 @@ once_cell = "1.8.0" [dependencies.tinysearch-shared] path = "../shared" -version = "0.6.3" +version = "0.7.0" [dependencies.xorf] version = "0.7.2" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index fc4da09..42cc843 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,10 +1,13 @@ [package] name = "tinysearch-shared" -version = "0.6.4" +version = "0.7.0" authors = ["Matthias Endler "] edition = "2021" description = "Shared libraries for tinysearch - a tiny search engine for static websites" license = "Apache-2.0/MIT" +documentation = "https://github.com/mre/tinysearch/blob/master/README.md" +homepage = "https://github.com/mre/tinysearch" +repository = "https://github.com/mre/tinysearch" [dependencies] bincode = "1.3.3" From 10c060bb5a496b699be33ad3108e1cb36760d308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?l=C3=A9on=20h?= Date: Mon, 3 Oct 2022 00:36:45 +0200 Subject: [PATCH 13/58] Github action documentation (#163) --- README.md | 14 ++++++++++++++ howto/hugo.md | 18 ++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e9a0bec..48ca46c 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,20 @@ docker build --build-arg RUST_IMAGE=rustlang/rust:nightly-alpine -t tinysearch/c - `TINY_REPO`: Overwrite repository of tinysearch - `TINY_BRANCH`: Overwrite tinysearch branch +## Github action + +To integrate tinysearch in continuous deployment pipelines, a [github action](https://github.com/marketplace/actions/tinysearch-action) is available. + +```yaml +- name: Build tinysearch + uses: leonhfr/tinysearch-action@v1 + with: + index: public/index.json + output_dir: public/wasm + output_types: | + wasm +``` + ## Users diff --git a/howto/hugo.md b/howto/hugo.md index cd8250b..f63a551 100644 --- a/howto/hugo.md +++ b/howto/hugo.md @@ -25,6 +25,20 @@ The output file will be in `public/index.json` See https://forestry.io/blog/build-a-json-api-with-hugo/ for more info. -# Credits +## Github action + +If building and deploying your hugo website using Github actions, you can use [tinysearch-action](https://github.com/leonhfr/tinysearch-action#deploy-a-hugo-website-to-github-pages). + +```yaml +- name: Build tinysearch + uses: leonhfr/tinysearch-action@v1 + with: + index: public/index.json + output_dir: public/wasm + output_types: | + wasm +``` + +## Credits -Tutorial created by [@Lusitaniae](https://github.com/Lusitaniae); edited by [@lord-re](https://github.com/lord-re). +Tutorial created by [@Lusitaniae](https://github.com/Lusitaniae); edited by [@lord-re](https://github.com/lord-re) and [@leonhfr](https://github.com/leonhfr). From 2a89670f14122856da0f5c5c42b2b881471d5fe3 Mon Sep 17 00:00:00 2001 From: Pavel Pletenev Date: Mon, 24 Jul 2023 14:51:58 +0300 Subject: [PATCH 14/58] Fix wrong ignore path and create a lot of features (#168) * Fix wrong ignore path * Fix other issues * Fix CI * Update `.github/workflows/ci.yml` * Add more verbose error and try to fix for new wasm-pack version * Update dependencies * allow nightly failures * Format; lint --------- Signed-off-by: Pavel Pletenev Co-authored-by: Matthias Endler --- .github/actions/cached-toolchain/action.yml | 49 ++ .github/workflows/ci.yml | 143 ++-- .github/workflows/release.yml | 10 +- .gitignore | 2 +- Cargo.lock | 518 ++++++-------- Cargo.toml | 3 +- Makefile | 6 +- bin/Cargo.toml | 28 - bin/build.rs | 11 - bin/src/main.rs | 160 ----- bin/assets/demo.html => demo.html | 0 engine/.gitignore | 3 - engine/Cargo.lock | 260 +++++++ engine/src/lib.rs | 55 -- examples/yew-example-crate/.gitignore | 3 + examples/yew-example-crate/Cargo.lock | 672 ++++++++++++++++++ examples/yew-example-crate/Cargo.toml | 16 + examples/yew-example-crate/Trunk.toml | 13 + examples/yew-example-crate/index.html | 15 + examples/yew-example-crate/src/index.scss | 3 + examples/yew-example-crate/src/main.rs | 110 +++ examples/yew-example-storage/.gitignore | 3 + examples/yew-example-storage/Cargo.lock | 598 ++++++++++++++++ examples/yew-example-storage/Cargo.toml | 16 + examples/yew-example-storage/Trunk.toml | 10 + examples/yew-example-storage/index.html | 15 + examples/yew-example-storage/src/index.scss | 3 + examples/yew-example-storage/src/main.rs | 115 +++ report.json | 26 + shared/.gitignore | 3 - shared/Cargo.toml | 22 - shared/src/lib.rs | 48 -- testscript.sh | 6 + {bin => tinysearch}/.gitignore | 0 tinysearch/Cargo.toml | 58 ++ .../assets/crate}/Cargo.toml | 23 +- tinysearch/assets/crate/src/lib.rs | 27 + tinysearch/assets/demo.html | 72 ++ {bin => tinysearch}/assets/stopwords | 0 tinysearch/src/bin/tinysearch.rs | 425 +++++++++++ tinysearch/src/bin/utils/assets.rs | 14 + .../src => tinysearch/src/bin/utils}/index.rs | 0 tinysearch/src/bin/utils/mod.rs | 3 + .../src/bin/utils}/storage.rs | 21 +- tinysearch/src/lib.rs | 83 +++ 45 files changed, 2941 insertions(+), 730 deletions(-) create mode 100644 .github/actions/cached-toolchain/action.yml delete mode 100644 bin/Cargo.toml delete mode 100644 bin/build.rs delete mode 100644 bin/src/main.rs rename bin/assets/demo.html => demo.html (100%) delete mode 100644 engine/.gitignore create mode 100644 engine/Cargo.lock delete mode 100644 engine/src/lib.rs create mode 100644 examples/yew-example-crate/.gitignore create mode 100644 examples/yew-example-crate/Cargo.lock create mode 100644 examples/yew-example-crate/Cargo.toml create mode 100644 examples/yew-example-crate/Trunk.toml create mode 100644 examples/yew-example-crate/index.html create mode 100644 examples/yew-example-crate/src/index.scss create mode 100644 examples/yew-example-crate/src/main.rs create mode 100644 examples/yew-example-storage/.gitignore create mode 100644 examples/yew-example-storage/Cargo.lock create mode 100644 examples/yew-example-storage/Cargo.toml create mode 100644 examples/yew-example-storage/Trunk.toml create mode 100644 examples/yew-example-storage/index.html create mode 100644 examples/yew-example-storage/src/index.scss create mode 100644 examples/yew-example-storage/src/main.rs create mode 100644 report.json delete mode 100644 shared/.gitignore delete mode 100644 shared/Cargo.toml delete mode 100644 shared/src/lib.rs create mode 100755 testscript.sh rename {bin => tinysearch}/.gitignore (100%) create mode 100644 tinysearch/Cargo.toml rename {engine => tinysearch/assets/crate}/Cargo.toml (59%) create mode 100644 tinysearch/assets/crate/src/lib.rs create mode 100644 tinysearch/assets/demo.html rename {bin => tinysearch}/assets/stopwords (100%) create mode 100644 tinysearch/src/bin/tinysearch.rs create mode 100644 tinysearch/src/bin/utils/assets.rs rename {bin/src => tinysearch/src/bin/utils}/index.rs (100%) create mode 100644 tinysearch/src/bin/utils/mod.rs rename {bin/src => tinysearch/src/bin/utils}/storage.rs (87%) create mode 100644 tinysearch/src/lib.rs diff --git a/.github/actions/cached-toolchain/action.yml b/.github/actions/cached-toolchain/action.yml new file mode 100644 index 0000000..5ee6944 --- /dev/null +++ b/.github/actions/cached-toolchain/action.yml @@ -0,0 +1,49 @@ +name: cached-toolchain +description: Install and cache toolchain + +inputs: + toolchain: + description: Rust toolchain specification -- see https://rust-lang.github.io/rustup/concepts/toolchains.html#toolchain-specification + required: true + targets: + description: Comma-separated list of target triples to install for this toolchain + required: false + +outputs: + cachekey: + description: A short hash of the rustc version, appropriate for use as a cache key. "20220627a831" + value: ${{steps.toolchain.outputs.cachekey}} + +runs: + using: composite + steps: + - uses: actions/cache/restore@v3 + with: + path: | + ~/.cargo/bin/rustup + ~/.rustup + key: ${{ runner.os }}-toolchain-${{ inputs.toolchain }}- + restore-keys: | + ${{ runner.os }}-toolchain-${{ inputs.toolchain }}- + + - shell: bash + run: + mkdir -p "${CARGO_HOME:-$HOME/.cargo}/bin" + echo "${CARGO_HOME:-$HOME/.cargo}/bin" >> $GITHUB_PATH + if: runner.os != 'Windows' + + - uses: dtolnay/rust-toolchain@e12eda571dc9a5ee5d58eecf4738ec291c66f295 # latest for 2023.03.10 + id: toolchain + with: + toolchain: ${{ inputs.toolchain }} + # enable clippy only only on stable + components: ${{ inputs.toolchain == 'stable' && 'clippy' || '' }} + # additional targets + targets: wasm32-unknown-unknown${{ inputs.targets && ',' || '' }}${{ inputs.targets && ',' || '' }} + + - uses: actions/cache/save@v3 + with: + path: | + ~/.cargo/bin/rustup + ~/.rustup + key: ${{ runner.os }}-toolchain-${{ inputs.toolchain }}-${{ steps.toolchain.outputs.cachekey }} \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 790dab4..0f6b5b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,31 +21,84 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 + - uses: ./.github/actions/cached-toolchain + id: toolchain with: - toolchain: stable - - - name: Run cargo test - uses: actions-rs/cargo@v1 + toolchain: 'stable' + + - uses: actions/cache@v3 with: - command: test + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target + key: test-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}-toolchain-${{ steps.toolchain.outputs.cachekey }} + restore-keys: | + test-${{ runner.os }}-cargo- + + - run: cargo test --all-features build: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: rust: [stable, beta, nightly] steps: - uses: actions/checkout@v1 + + - uses: ./.github/actions/cached-toolchain + id: toolchain + with: + toolchain: ${{matrix.rust}} - - name: Set Rust version - env: - RUST_VERSION: ${{ matrix.rust }} - run: rustup default ${RUST_VERSION} + - uses: actions/cache/restore@v3 # fetched cargo cache + id: cache-cargo-install-restore + with: + path: | + ~/.cargo + key: build-tools-${{matrix.rust}}-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}-toolchain-${{ steps.toolchain.outputs.cachekey }} + restore-keys: | + build-tools-${{matrix.rust}}-${{ runner.os }}-cargo- - - if: matrix.rust == 'stable' - run: rustup component add clippy + - name: Install wasm-pack + run: + cargo install --git https://github.com/mre/wasm-pack.git --branch first-class-bins + + - name: Print wasm-pack version + run: wasm-pack --version + + - name: Install trunk + run: + cargo install --locked trunk@0.17.2 # latest on 2023.07.24 + + - name: Print trunk version + run: trunk --version + + - uses: actions/cache/save@v3 # saves cargo cache + with: + path: | + ~/.cargo + key: ${{ steps.cache-cargo-install-restore.outputs.cache-primary-key }} + + - uses: actions/cache@v3 # fetched cargo cache + id: cache-cargo-restore + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target + key: build-${{matrix.rust}}-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}-toolchain-${{ steps.toolchain.outputs.cachekey }} + restore-keys: | + build-${{matrix.rust}}-${{ runner.os }}-cargo- + + - name: cargo fetch + run: cargo fetch --locked - if: matrix.rust == 'stable' uses: actions-rs/clippy-check@v1 @@ -53,18 +106,20 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} args: --all-features - - name: Install wasm-pack - run: - cargo install --force --git https://github.com/mre/wasm-pack.git --branch first-class-bins - - - name: Print wasm-pack version - run: wasm-pack --version - - name: Build and install tinysearch - run: cargo install --force --path bin + run: cargo install --force --path tinysearch --features=bin + continue-on-error: ${{ matrix.rust == 'nightly' }} - name: Build WASM package from test index - run: RUST_LOG=debug tinysearch fixtures/index.json + run: RUST_LOG=debug tinysearch -e 'path= "'$PWD'/tinysearch"' fixtures/index.json + continue-on-error: ${{ matrix.rust == 'nightly' }} + + - name: Build examples + run: + for example in `ls -1 examples`; do + (cd "examples/$example" && trunk build index.html) || exit 1; + done + continue-on-error: ${{ matrix.rust == 'nightly' }} - name: Audit for Security Vulnerabilities uses: actions-rs/audit-check@v1 @@ -77,33 +132,6 @@ jobs: command: doc args: --all-features --no-deps - publish-check: - if: startsWith(github.ref, 'refs/tags/') != true - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v1 - - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - - - name: cargo fetch - uses: actions-rs/cargo@v1 - with: - command: fetch - - # Note that we don't check `bin` here as it depends on `shared` - # to be published (which we don't do during dry-run). - # This could be fixed by allowing publish relative paths. - # https://github.com/rust-lang/cargo/issues/6738 - # https://github.com/rust-lang/cargo/issues/1565 - - name: Publish check for shared - uses: actions-rs/cargo@v1 - with: - command: publish - args: --dry-run --manifest-path shared/Cargo.toml - publish: if: startsWith(github.ref, 'refs/tags/') needs: @@ -117,24 +145,7 @@ jobs: with: command: fetch - - name: Publish shared - uses: actions-rs/cargo@v1 - env: - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} - with: - command: publish - args: --manifest-path shared/Cargo.toml - - # The crates.io API might be delayed by a few seconds. - # As a result, the newly published `shared` might not be - # published when the binary that depends on it wants to pull it. - # Wait a few seconds to avoid that race-condition. - - name: Wait for cargo publication - uses: juliangruber/sleep-action@v1 - with: - time: 30s - - - name: Publish binary + - name: Publish uses: actions-rs/cargo@v1 env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9f85ced..6665e2c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,8 +23,8 @@ jobs: id: get_release uses: bruceadams/get-release@v1.2.2 env: - GITHUB_TOKEN: ${{ github.token }} - + GITHUB_TOKEN: ${{ github.token }} + linux: runs-on: ubuntu-latest needs: prepare @@ -58,12 +58,10 @@ jobs: sudo apt-get install -y binutils-aarch64-linux-gnu - uses: actions/checkout@v2 - - - uses: actions-rs/toolchain@v1 + - uses: ./.github/actions/cached-toolchain with: - profile: minimal toolchain: stable - default: true + targets: ${{ matrix.target }} - name: Build ${{ matrix.target }} uses: actions-rs/cargo@v1 diff --git a/.gitignore b/.gitignore index 76c9e2d..27a0c23 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,5 @@ tinysearch_engine_bg.wasm.d.ts package.json pkg/ wasm_output/ -demo.html .vscode +tinysearch-engine diff --git a/Cargo.lock b/Cargo.lock index 0e8e654..1ca5e0a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,23 +2,17 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - [[package]] name = "anyhow" -version = "1.0.43" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28ae2b3dec75a406790005a200b1bd89785afc02517a00ca99ecfe093ee9e6cf" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" [[package]] name = "argh" -version = "0.1.5" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e7317a549bc17c5278d9e72bb6e62c6aa801ac2567048e39ebc1c194249323e" +checksum = "ab257697eb9496bf75526f0217b5ed64636a9cfafa78b8365c71bd283fcef93e" dependencies = [ "argh_derive", "argh_shared", @@ -26,28 +20,21 @@ dependencies = [ [[package]] name = "argh_derive" -version = "0.1.5" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60949c42375351e9442e354434b0cba2ac402c1237edf673cac3a4bf983b8d3c" +checksum = "b382dbd3288e053331f03399e1db106c9fb0d8562ad62cb04859ae926f324fa6" dependencies = [ "argh_shared", - "heck", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "argh_shared" -version = "0.1.5" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a61eb019cb8f415d162cb9f12130ee6bbe9168b7d953c17f4ad049e4051ca00" - -[[package]] -name = "autocfg" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "64cb94155d965e3d37ffbbe7cc5b82c3dd79dd33bd48e536f73d2cfb8d85506f" [[package]] name = "bincode" @@ -64,6 +51,18 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + [[package]] name = "cfg-if" version = "1.0.0" @@ -71,175 +70,124 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "crc32fast" -version = "1.2.1" +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" -dependencies = [ - "cfg-if", -] +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "flate2" -version = "1.0.20" +name = "errno" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd3aec53de10fe96d7d8c565eb17f2c687bb5518a2ec453b5b1252964526abe0" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ - "cfg-if", - "crc32fast", + "errno-dragonfly", "libc", - "miniz_oxide", + "windows-sys", ] [[package]] -name = "getopts" -version = "0.2.21" +name = "errno-dragonfly" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" dependencies = [ - "unicode-width", + "cc", + "libc", ] [[package]] -name = "getrandom" -version = "0.1.16" +name = "fastrand" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] +checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" [[package]] -name = "getrandom" -version = "0.2.3" +name = "getopts" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" dependencies = [ - "cfg-if", - "libc", - "wasi 0.10.2+wasi-snapshot-preview1", + "unicode-width", ] [[package]] -name = "heck" -version = "0.3.3" +name = "hashbrown" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" [[package]] -name = "includedir" -version = "0.6.0" +name = "heck" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afd126bd778c00c43a9dc76d1609a0894bf4222088088b2217ccc0ce9e816db7" -dependencies = [ - "flate2", - "phf", -] +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] -name = "includedir_codegen" -version = "0.6.0" +name = "indexmap" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ac1500c9780957c9808c4ec3b94002f35aab01483833f5a8bce7dfb243e3148" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" dependencies = [ - "flate2", - "phf_codegen", - "walkdir", + "equivalent", + "hashbrown", ] [[package]] name = "itoa" -version = "0.4.7" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "libc" -version = "0.2.99" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7f823d141fe0a24df1e23b4af4e3c7ba9e5966ec514ea068c93024aa7deb765" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] -name = "log" -version = "0.4.14" +name = "linux-raw-sys" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if", -] +checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" [[package]] -name = "memchr" -version = "2.4.0" +name = "log" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" [[package]] -name = "miniz_oxide" -version = "0.4.4" +name = "memchr" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" -dependencies = [ - "adler", - "autocfg", -] +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "phf" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.8.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ - "phf_generator", "phf_shared", ] -[[package]] -name = "phf_generator" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" -dependencies = [ - "phf_shared", - "rand 0.7.3", -] - [[package]] name = "phf_shared" -version = "0.8.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ "siphasher", ] -[[package]] -name = "ppv-lite86" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" - [[package]] name = "proc-macro2" -version = "1.0.28" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] @@ -248,7 +196,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca36dea94d187597e104a5c8e4b07576a8a45aa5db48a65e12940d3eb7461f55" dependencies = [ - "bitflags", + "bitflags 1.3.2", "getopts", "memchr", "unicase", @@ -256,207 +204,149 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.9" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] [[package]] -name = "rand" -version = "0.7.3" +name = "redox_syscall" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc 0.2.0", - "rand_pcg", + "bitflags 1.3.2", ] [[package]] -name = "rand" -version = "0.8.4" +name = "rustix" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" +checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" dependencies = [ + "bitflags 2.3.3", + "errno", "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.3", - "rand_hc 0.3.1", + "linux-raw-sys", + "windows-sys", ] [[package]] -name = "rand_chacha" -version = "0.2.2" +name = "rustversion" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", -] +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom 0.2.3", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "rand_hc" -version = "0.3.1" +name = "ryu" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" -dependencies = [ - "rand_core 0.6.3", -] +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] -name = "rand_pcg" -version = "0.2.1" +name = "serde" +version = "1.0.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b" dependencies = [ - "rand_core 0.5.1", + "serde_derive", ] [[package]] -name = "redox_syscall" -version = "0.2.10" +name = "serde_derive" +version = "1.0.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4" dependencies = [ - "bitflags", + "proc-macro2", + "quote", + "syn 2.0.27", ] [[package]] -name = "remove_dir_all" -version = "0.5.3" +name = "serde_json" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" dependencies = [ - "winapi", + "itoa", + "ryu", + "serde", ] [[package]] -name = "ryu" -version = "1.0.5" +name = "siphasher" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] -name = "same-file" -version = "1.0.6" +name = "strip_markdown" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +checksum = "32c754386109f9adc8ca62513c51cf81cc2d8502588064103aa8e9f69b4276da" dependencies = [ - "winapi-util", + "log", + "pulldown-cmark", ] [[package]] -name = "serde" -version = "1.0.127" +name = "strum" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03b9878abf6d14e6779d3f24f07b2cfa90352cfec4acc5aab8f1ac7f146fae8" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "serde_derive", + "strum_macros", ] [[package]] -name = "serde_derive" -version = "1.0.127" +name = "strum_macros" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a024926d3432516606328597e0f224a51355a493b49fdd67e9209187cbe55ecc" +checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" dependencies = [ + "heck", "proc-macro2", "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "336b10da19a12ad094b59d870ebde26a45402e5b470add4b5fd03c5048a32127" -dependencies = [ - "itoa", - "ryu", - "serde", + "rustversion", + "syn 2.0.27", ] [[package]] -name = "siphasher" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "729a25c17d72b06c68cb47955d44fda88ad2d3e7d77e025663fdd69b93dd71a1" - -[[package]] -name = "strip_markdown" -version = "0.2.0" +name = "syn" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32c754386109f9adc8ca62513c51cf81cc2d8502588064103aa8e9f69b4276da" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ - "log", - "pulldown-cmark", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] name = "syn" -version = "1.0.74" +version = "2.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" +checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", ] [[package]] name = "tempfile" -version = "3.2.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" dependencies = [ "cfg-if", - "libc", - "rand 0.8.4", + "fastrand", "redox_syscall", - "remove_dir_all", - "winapi", + "rustix", + "windows-sys", ] [[package]] @@ -465,26 +355,34 @@ version = "0.7.0" dependencies = [ "anyhow", "argh", - "includedir", - "includedir_codegen", + "bincode", "log", "phf", "serde", "serde_derive", "serde_json", "strip_markdown", + "strum", "tempfile", - "tinysearch-shared", + "toml_edit", "xorf", ] [[package]] -name = "tinysearch-shared" -version = "0.7.0" +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" + +[[package]] +name = "toml_edit" +version = "0.19.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" dependencies = [ - "bincode", - "serde", - "xorf", + "indexmap", + "toml_datetime", + "winnow", ] [[package]] @@ -497,89 +395,103 @@ dependencies = [ ] [[package]] -name = "unicode-segmentation" -version = "1.8.0" +name = "unicode-ident" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-width" -version = "0.1.8" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] -name = "unicode-xid" -version = "0.2.2" +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] -name = "version_check" -version = "0.9.3" +name = "windows-sys" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] [[package]] -name = "walkdir" -version = "2.3.2" +name = "windows-targets" +version = "0.48.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" dependencies = [ - "same-file", - "winapi", - "winapi-util", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" +name = "windows_aarch64_gnullvm" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +name = "windows_aarch64_msvc" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] -name = "winapi" -version = "0.3.9" +name = "windows_i686_gnu" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" +name = "windows_i686_msvc" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] -name = "winapi-util" -version = "0.1.5" +name = "windows_x86_64_gnu" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "windows_x86_64_gnullvm" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "winnow" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7" +dependencies = [ + "memchr", +] [[package]] name = "xorf" -version = "0.7.2" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0b223640dfbc22009679ce79f6777ae1db79add62a07345605777f69274a052" +checksum = "57901b00e3f8e14f4d20b8955bf8087ecb545cfe2ed8741c2a2dbc89847a1a29" dependencies = [ - "rand 0.8.4", "serde", ] diff --git a/Cargo.toml b/Cargo.toml index 54d2cf3..e9e9a5a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,4 @@ [workspace] members = [ - "bin", - "shared", + "tinysearch", ] \ No newline at end of file diff --git a/Makefile b/Makefile index 09fcb1f..585965e 100644 --- a/Makefile +++ b/Makefile @@ -19,11 +19,11 @@ build: ### Compile project .PHONY: install install: ## Install tinysearch - cargo install --force --path bin + cargo install --force --path tinysearch .PHONY: test test: ## Run unit tests - cargo test + cargo test --features=bin .PHONY: run run: ## Run tinysearch with sample input @@ -31,7 +31,7 @@ run: ## Run tinysearch with sample input .PHONY: pack pack: ## Pack tinysearch node module - wasm-pack build bin + wasm-pack build tinysearch wasm-pack pack .PHONY: publish diff --git a/bin/Cargo.toml b/bin/Cargo.toml deleted file mode 100644 index 672e709..0000000 --- a/bin/Cargo.toml +++ /dev/null @@ -1,28 +0,0 @@ -[package] -name = "tinysearch" -authors = ["Matthias Endler "] -version = "0.7.0" -edition = "2021" -description = "A tiny search engine for static websites" -license = "Apache-2.0/MIT" -documentation = "https://github.com/mre/tinysearch/blob/master/README.md" -homepage = "https://github.com/mre/tinysearch" -repository = "https://github.com/mre/tinysearch" - -[dependencies] -tinysearch-shared = { path = "../shared", version = "0.7.0" } - -argh = "0.1.5" -log = "0.4.14" -serde_json = "1.0.66" -anyhow = "1.0.43" -tempfile = "3.2.0" -serde = { version = "1.0.127", features = ["derive"] } -serde_derive = "1.0.127" -phf = "0.8.0" -includedir = "0.6.0" -strip_markdown = "0.2.0" -xorf = "0.7.2" - -[build-dependencies] -includedir_codegen = "0.6.0" diff --git a/bin/build.rs b/bin/build.rs deleted file mode 100644 index c0e3378..0000000 --- a/bin/build.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate includedir_codegen; - -use includedir_codegen::Compression; - -fn main() { - includedir_codegen::start("FILES") - .dir("../engine", Compression::Gzip) - .dir("../shared", Compression::Gzip) - .build("engine.rs") - .unwrap(); -} diff --git a/bin/src/main.rs b/bin/src/main.rs deleted file mode 100644 index e7a196e..0000000 --- a/bin/src/main.rs +++ /dev/null @@ -1,160 +0,0 @@ -#[macro_use] -extern crate log; - -mod index; -mod storage; - -use anyhow::{bail, Context, Error, Result}; -use argh::FromArgs; -use std::io::Write; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; -use std::{env, fs}; -use tempfile::tempdir; - -use fs::File; -use index::Posts; - -// The search engine code gets statically included into the binary. -// During indexation (when running tinysearch), this will be compiled to WASM. -include!(concat!(env!("OUT_DIR"), "/engine.rs")); - -// Include a bare-bones HTML page that demonstrates how tinysearch is used -static DEMO_HTML: &str = include_str!("../assets/demo.html"); - -fn default_output_dir() -> PathBuf { - fs::create_dir_all("./wasm_output").unwrap(); - PathBuf::from("./wasm_output") -} - -#[derive(FromArgs)] -/// A tiny, static search engine for static websites -struct Opt { - /// show version and exit - #[argh(switch)] - version: bool, - - /// index JSON file to process - #[argh(positional)] - index: Option, - - /// output path for WASM module ("wasm_output" directory by default) - #[argh(option, short = 'p', long = "path", default = "default_output_dir()")] - out_path: PathBuf, - - /// optimize the output using binaryen - #[argh(switch, short = 'o', long = "optimize")] - optimize: bool, -} - -fn unpack_engine(temp_dir: &Path) -> Result<(), Error> { - println!("Starting unpack"); - for file in FILES.file_names() { - println!("Copying {:?}", file); - // This hack removes the "../" prefix that - // gets introduced by including the crates - // from the `bin` parent directory. - let filepath = file.trim_start_matches("../"); - let outpath = temp_dir.join(filepath); - if let Some(parent) = outpath.parent() { - debug!("Creating parent dir {:?}", &parent); - fs::create_dir_all(&parent)?; - } - let content = FILES.get(file)?; - let mut outfile = File::create(&outpath)?; - outfile.write_all(&content)?; - } - Ok(()) -} - -fn main() -> Result<(), Error> { - FILES.set_passthrough(env::var_os("PASSTHROUGH").is_some()); - - let opt: Opt = argh::from_env(); - - if opt.version { - println!("tinysearch {}", env!("CARGO_PKG_VERSION")); - std::process::exit(0); - } - - let out_path = opt.out_path.canonicalize()?; - - let index = opt.index.context("No index file specified")?; - let posts: Posts = index::read(fs::read_to_string(index)?)?; - trace!("Generating storage from posts: {:#?}", posts); - storage::write(posts)?; - - let temp_dir = tempdir()?; - println!( - "Unpacking tinysearch WASM engine into temporary directory {:?}", - temp_dir.path() - ); - unpack_engine(temp_dir.path())?; - debug!("Crate content extracted to {:?}/", &temp_dir); - - let engine_dir = temp_dir.path().join("engine"); - if !engine_dir.exists() { - fs::create_dir_all(&engine_dir)?; - } - if !engine_dir.exists() { - for path in fs::read_dir(out_path)? { - println!("Name: {}", path.unwrap().path().display()) - } - bail!( - "Engine directory could not be created at {}", - engine_dir.display() - ); - } - - println!("Copying index into crate"); - fs::copy("storage", engine_dir.join("storage"))?; - - println!("Compiling WASM module using wasm-pack"); - wasm_pack(&temp_dir.path().join("engine"), &out_path)?; - - if opt.optimize { - optimize(&out_path)?; - } - - fs::write(&out_path.join("demo.html"), DEMO_HTML)?; - - println!("All done! Open the output folder with a web server to try the demo."); - Ok(()) -} - -fn wasm_pack(in_dir: &Path, out_dir: &Path) -> Result { - run_output( - Command::new("wasm-pack") - .arg("build") - .arg(in_dir) - .arg("--target") - .arg("web") - .arg("--release") - .arg("--out-dir") - .arg(out_dir), - ) -} - -fn optimize(dir: &Path) -> Result { - run_output( - Command::new("wasm-opt") - .current_dir(dir) - .arg("-Oz") - .arg("-o") - .arg("tinysearch_engine_bg.wasm") - .arg("tinysearch_engine_bg.wasm"), - ) -} - -pub fn run_output(cmd: &mut Command) -> Result { - log::debug!("running {:?}", cmd); - let output = cmd - .stderr(Stdio::inherit()) - .output() - .with_context(|| format!("failed to run {:?}", cmd))?; - - if !output.status.success() { - anyhow::bail!("failed to execute {:?}\nstatus: {}", cmd, output.status) - } - Ok(String::from_utf8_lossy(&output.stdout).into_owned()) -} diff --git a/bin/assets/demo.html b/demo.html similarity index 100% rename from bin/assets/demo.html rename to demo.html diff --git a/engine/.gitignore b/engine/.gitignore deleted file mode 100644 index 2f88dba..0000000 --- a/engine/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/target -**/*.rs.bk -Cargo.lock \ No newline at end of file diff --git a/engine/Cargo.lock b/engine/Cargo.lock new file mode 100644 index 0000000..a2f3dd3 --- /dev/null +++ b/engine/Cargo.lock @@ -0,0 +1,260 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1e260c3a9040a7c19a12468758f4c16f31a81a1fe087482be9570ec864bb6c" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "itoa" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125" + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "memory_units" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8452105ba047068f40ff7093dd1d9da90898e63dd61736462e9cdda6a90ad3c3" + +[[package]] +name = "once_cell" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" + +[[package]] +name = "proc-macro2" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f84e92c0f7c9d58328b85a78557813e4bd845130db68d7184635344399423b1" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" + +[[package]] +name = "serde" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9875c23cf305cd1fd7eb77234cbb705f21ea6a72c637a5c6db5fe4b8e7f008" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc0db5cb2556c0e558887d9bbdcf6ac4471e83ff66cf696e5419024d1606276" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcbd0344bc6533bc7ec56df11d42fb70f1b912351c0825ccb7211b59d8af7cf5" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecb2e6da8ee5eb9a61068762a32fa9619cc591ceb055b3687f4cd4051ec2e06b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tinysearch-engine" +version = "0.6.4" +dependencies = [ + "once_cell", + "tinysearch-shared", + "wasm-bindgen", + "wee_alloc", + "xorf", +] + +[[package]] +name = "tinysearch-shared" +version = "0.6.4" +dependencies = [ + "bincode", + "serde", + "xorf", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "wasm-bindgen" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" +dependencies = [ + "cfg-if 1.0.0", + "serde", + "serde_json", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" + +[[package]] +name = "wee_alloc" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb3b5a6b2bb17cb6ad44a2e68a43e8d2722c997da10e928665c72ec6c0a0b8e" +dependencies = [ + "cfg-if 0.1.10", + "libc", + "memory_units", + "winapi", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xorf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7afb3a52aef0211e557044386369919b033004fdcf4c56c5017710f97fa9aa3c" +dependencies = [ + "serde", +] diff --git a/engine/src/lib.rs b/engine/src/lib.rs deleted file mode 100644 index a0905eb..0000000 --- a/engine/src/lib.rs +++ /dev/null @@ -1,55 +0,0 @@ -use once_cell::sync::Lazy; -use wasm_bindgen::prelude::*; -use xorf::{HashProxy, Xor8}; - -use std::cmp::Reverse; -use std::collections::hash_map::DefaultHasher; - -use tinysearch_shared::{Filters, PostId, Score, Storage}; -pub type Filter = HashProxy; - -#[global_allocator] -static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT; - -const TITLE_WEIGHT: usize = 3; - -static FILTERS: Lazy = Lazy::new(|| { - let bytes = include_bytes!("../storage"); - Storage::from_bytes(bytes).unwrap().filters -}); - -// Wrapper around filter score, that also scores the post title -// Post title score has a higher weight than post body -fn score(title: &String, search_terms: &Vec, filter: &Filter) -> usize { - let title_terms: Vec = tokenize(&title); - let title_score: usize = search_terms - .iter() - .filter(|term| title_terms.contains(&term)) - .count(); - TITLE_WEIGHT * title_score + filter.score(search_terms) -} - -fn tokenize(s: &str) -> Vec { - s.to_lowercase() - .split_whitespace() - .filter(|&t| !t.trim().is_empty()) - .map(String::from) - .collect() -} - -#[wasm_bindgen] -pub fn search(query: String, num_results: usize) -> JsValue { - let search_terms: Vec = tokenize(&query); - - let mut matches: Vec<(&PostId, usize)> = FILTERS - .iter() - .map(|(post_id, filter)| (post_id, score(&post_id.0, &search_terms, &filter))) - .filter(|(_post_id, score)| *score > 0) - .collect(); - - matches.sort_by_key(|k| Reverse(k.1)); - - let results: Vec<&PostId> = matches.into_iter().take(num_results).map(|p| p.0).collect(); - - JsValue::from_serde(&results).unwrap() -} diff --git a/examples/yew-example-crate/.gitignore b/examples/yew-example-crate/.gitignore new file mode 100644 index 0000000..936c7e2 --- /dev/null +++ b/examples/yew-example-crate/.gitignore @@ -0,0 +1,3 @@ +example-search +target +dist diff --git a/examples/yew-example-crate/Cargo.lock b/examples/yew-example-crate/Cargo.lock new file mode 100644 index 0000000..f14f703 --- /dev/null +++ b/examples/yew-example-crate/Cargo.lock @@ -0,0 +1,672 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" + +[[package]] +name = "anymap" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33954243bd79057c2de7338850b85983a44588021f8a5fee574a8888c6de4344" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "boolinator" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfa8873f51c92e232f9bac4065cddef41b714152812bfc5f7672ba16d6ef8cd9" + +[[package]] +name = "bumpalo" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + +[[package]] +name = "bytes" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg-match" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8100e46ff92eb85bf6dc2930c73f2a4f7176393c84a9446b3d501e1b354e7b34" + +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen", +] + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + +[[package]] +name = "example_search" +version = "0.7.0" +dependencies = [ + "once_cell", + "serde-wasm-bindgen", + "tinysearch", + "wasm-bindgen", + "wee_alloc", + "xorf", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "gloo" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ce6f2dfa9f57f15b848efa2aade5e1850dc72986b87a2b0752d44ca08f4967" +dependencies = [ + "gloo-console-timer", + "gloo-events", + "gloo-file", + "gloo-timers", +] + +[[package]] +name = "gloo-console-timer" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b48675544b29ac03402c6dffc31a912f716e38d19f7e74b78b7e900ec3c941ea" +dependencies = [ + "web-sys", +] + +[[package]] +name = "gloo-events" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b107f8abed8105e4182de63845afcc7b69c098b7852a813ea7462a320992fc" +dependencies = [ + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "gloo-file" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f9fecfe46b5dc3cc46f58e98ba580cc714f2c93860796d002eb3527a465ef49" +dependencies = [ + "gloo-events", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "gloo-timers" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "indexmap" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" + +[[package]] +name = "js-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if 1.0.0", + "ryu", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memory_units" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8452105ba047068f40ff7093dd1d9da90898e63dd61736462e9cdda6a90ad3c3" + +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "lexical-core", + "memchr", + "version_check", +] + +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + +[[package]] +name = "proc-macro2" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" + +[[package]] +name = "semver" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" + +[[package]] +name = "serde" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde-wasm-bindgen" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b4c031cd0d9014307d82b8abf653c0290fbdaeb4c02d00c63cf52f728628bf" +dependencies = [ + "js-sys", + "serde", + "wasm-bindgen", +] + +[[package]] +name = "serde_derive" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "slab" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +dependencies = [ + "autocfg", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinysearch" +version = "0.7.0" +dependencies = [ + "bincode", + "serde", + "xorf", +] + +[[package]] +name = "unicode-ident" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "775c11906edafc97bc378816b94585fbd9a054eabaf86fdd0ced94af449efab7" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasm-bindgen" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +dependencies = [ + "cfg-if 1.0.0", + "serde", + "serde_json", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +dependencies = [ + "cfg-if 1.0.0", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" + +[[package]] +name = "web-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wee_alloc" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb3b5a6b2bb17cb6ad44a2e68a43e8d2722c997da10e928665c72ec6c0a0b8e" +dependencies = [ + "cfg-if 0.1.10", + "libc", + "memory_units", + "winapi", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xorf" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0b223640dfbc22009679ce79f6777ae1db79add62a07345605777f69274a052" +dependencies = [ + "serde", +] + +[[package]] +name = "ybc" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c409d21870c31cc3beb3b5ba8447306ecfac198876fa73bdce861b23299121" +dependencies = [ + "derive_more", + "web-sys", + "yew", + "yew-router", + "yewtil", +] + +[[package]] +name = "yew" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4d5154faef86dddd2eb333d4755ea5643787d20aca683e58759b0e53351409f" +dependencies = [ + "anyhow", + "anymap", + "bincode", + "cfg-if 1.0.0", + "cfg-match", + "console_error_panic_hook", + "gloo", + "http", + "indexmap", + "js-sys", + "log", + "ryu", + "serde", + "serde_json", + "slab", + "thiserror", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "yew-macro", +] + +[[package]] +name = "yew-example-crate" +version = "0.1.0" +dependencies = [ + "console_error_panic_hook", + "example_search", + "wasm-bindgen", + "ybc", + "yew", +] + +[[package]] +name = "yew-macro" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6e23bfe3dc3933fbe9592d149c9985f3047d08c637a884b9344c21e56e092ef" +dependencies = [ + "boolinator", + "lazy_static", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "yew-router" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27666236d9597eac9be560e841e415e20ba67020bc8cd081076be178e159c8bc" +dependencies = [ + "cfg-if 1.0.0", + "cfg-match", + "gloo", + "js-sys", + "log", + "nom", + "serde", + "serde_json", + "wasm-bindgen", + "web-sys", + "yew", + "yew-router-macro", + "yew-router-route-parser", +] + +[[package]] +name = "yew-router-macro" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c0ace2924b7a175e2d1c0e62ee7022a5ad840040dcd52414ce5f410ab322dba" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "yew-router-route-parser", +] + +[[package]] +name = "yew-router-route-parser" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de4a67208fb46b900af18a7397938b01f379dfc18da34799cfa8347eec715697" +dependencies = [ + "nom", +] + +[[package]] +name = "yewtil" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8543663ac49cd613df079282a1d8bdbdebdad6e02bac229f870fd4237b5d9aaa" +dependencies = [ + "log", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "yew", +] diff --git a/examples/yew-example-crate/Cargo.toml b/examples/yew-example-crate/Cargo.toml new file mode 100644 index 0000000..adfd90e --- /dev/null +++ b/examples/yew-example-crate/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "yew-example-crate" +version = "0.1.0" +edition = "2021" + + +[dependencies] +console_error_panic_hook = "0.1" +wasm-bindgen = "0.2" +ybc = "0.2" +yew = "0.18" +example_search = {path = "./example-search", features=[]} + + +[workspace] +members = [] \ No newline at end of file diff --git a/examples/yew-example-crate/Trunk.toml b/examples/yew-example-crate/Trunk.toml new file mode 100644 index 0000000..0613947 --- /dev/null +++ b/examples/yew-example-crate/Trunk.toml @@ -0,0 +1,13 @@ +[build] +target = "index.html" +dist = "dist" + +[[hooks]] +# This hook example shows all the current available fields. It will execute the equivalent of +# typing "echo Hello Trunk!" right at the start of the build process (even before the HTML file +# is read). By default, the command is spawned directly and no shell is used. +stage = "pre_build" +command = "sh" +command_arguments = [ + "-c", + "cd ../../ && cargo run --features=bin --release -- -m crate -p examples/yew-example-crate/example-search/ --crate-name example_search --engine-version 'path=\"../../../tinysearch\"' --non-top-level-crate fixtures/index.json"] diff --git a/examples/yew-example-crate/index.html b/examples/yew-example-crate/index.html new file mode 100644 index 0000000..f870492 --- /dev/null +++ b/examples/yew-example-crate/index.html @@ -0,0 +1,15 @@ + + + + + + Trunk | Yew | YBC + + + + + + + + + diff --git a/examples/yew-example-crate/src/index.scss b/examples/yew-example-crate/src/index.scss new file mode 100644 index 0000000..35a1942 --- /dev/null +++ b/examples/yew-example-crate/src/index.scss @@ -0,0 +1,3 @@ +@charset "utf-8"; + +html {} diff --git a/examples/yew-example-crate/src/main.rs b/examples/yew-example-crate/src/main.rs new file mode 100644 index 0000000..748f419 --- /dev/null +++ b/examples/yew-example-crate/src/main.rs @@ -0,0 +1,110 @@ +#![recursion_limit = "1024"] + +use console_error_panic_hook::set_once as set_panic_hook; +use ybc::TileCtx::{Child, Parent}; +use ybc::InputType::Text; +use yew::prelude::*; + +use example_search::search_local; + +struct App{ + value: String, + posts: Vec, + link: ComponentLink +} + +enum Msg { + SearchChanged(String) +} + +impl App{ + fn render_post(s: &String) -> yew::Html{ + html!{ + + + {s.clone()} + + + } + } +} + +impl Component for App { + type Message = Msg; + type Properties = (); + + fn create(_: Self::Properties, link: ComponentLink) -> Self { + Self{ + value: String::default(), + posts: Vec::new(), + link: link + } + } + + fn update(&mut self, msg: Self::Message) -> bool { + match msg { + Msg::SearchChanged(s) => { + if s != self.value{ + self.value = s; + let posts = search_local(self.value.clone(), 5); + self.posts = posts.iter().map(|x|x.0.clone()).collect(); + true + }else{ + false + } + } + } + } + + fn change(&mut self, _: Self::Properties) -> bool { + false + } + + fn view(&self) -> Html { + html! { + <> + + {"Tinysearch | Trunk | Yew | YBC | demo"} + + } + navstart=html!{} + navend=html!{ + <> + + + + + } + /> + + + + + { + for self.posts.iter().map(App::render_post) + } + + + }> + + + } + } +} + +fn main() { + set_panic_hook(); + + yew::start_app::(); +} \ No newline at end of file diff --git a/examples/yew-example-storage/.gitignore b/examples/yew-example-storage/.gitignore new file mode 100644 index 0000000..936c7e2 --- /dev/null +++ b/examples/yew-example-storage/.gitignore @@ -0,0 +1,3 @@ +example-search +target +dist diff --git a/examples/yew-example-storage/Cargo.lock b/examples/yew-example-storage/Cargo.lock new file mode 100644 index 0000000..8d62f2c --- /dev/null +++ b/examples/yew-example-storage/Cargo.lock @@ -0,0 +1,598 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" + +[[package]] +name = "anymap" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33954243bd79057c2de7338850b85983a44588021f8a5fee574a8888c6de4344" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "boolinator" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfa8873f51c92e232f9bac4065cddef41b714152812bfc5f7672ba16d6ef8cd9" + +[[package]] +name = "bumpalo" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + +[[package]] +name = "bytes" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg-match" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8100e46ff92eb85bf6dc2930c73f2a4f7176393c84a9446b3d501e1b354e7b34" + +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "gloo" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ce6f2dfa9f57f15b848efa2aade5e1850dc72986b87a2b0752d44ca08f4967" +dependencies = [ + "gloo-console-timer", + "gloo-events", + "gloo-file", + "gloo-timers", +] + +[[package]] +name = "gloo-console-timer" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b48675544b29ac03402c6dffc31a912f716e38d19f7e74b78b7e900ec3c941ea" +dependencies = [ + "web-sys", +] + +[[package]] +name = "gloo-events" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b107f8abed8105e4182de63845afcc7b69c098b7852a813ea7462a320992fc" +dependencies = [ + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "gloo-file" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f9fecfe46b5dc3cc46f58e98ba580cc714f2c93860796d002eb3527a465ef49" +dependencies = [ + "gloo-events", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "gloo-timers" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "indexmap" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" + +[[package]] +name = "js-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "lexical-core", + "memchr", + "version_check", +] + +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + +[[package]] +name = "proc-macro2" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" + +[[package]] +name = "semver" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" + +[[package]] +name = "serde" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "slab" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +dependencies = [ + "autocfg", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinysearch" +version = "0.7.0" +dependencies = [ + "bincode", + "serde", + "xorf", +] + +[[package]] +name = "unicode-ident" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "775c11906edafc97bc378816b94585fbd9a054eabaf86fdd0ced94af449efab7" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasm-bindgen" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +dependencies = [ + "cfg-if", + "serde", + "serde_json", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" + +[[package]] +name = "web-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "xorf" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0b223640dfbc22009679ce79f6777ae1db79add62a07345605777f69274a052" +dependencies = [ + "serde", +] + +[[package]] +name = "ybc" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c409d21870c31cc3beb3b5ba8447306ecfac198876fa73bdce861b23299121" +dependencies = [ + "derive_more", + "web-sys", + "yew", + "yew-router", + "yewtil", +] + +[[package]] +name = "yew" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4d5154faef86dddd2eb333d4755ea5643787d20aca683e58759b0e53351409f" +dependencies = [ + "anyhow", + "anymap", + "bincode", + "cfg-if", + "cfg-match", + "console_error_panic_hook", + "gloo", + "http", + "indexmap", + "js-sys", + "log", + "ryu", + "serde", + "serde_json", + "slab", + "thiserror", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "yew-macro", +] + +[[package]] +name = "yew-example-storage" +version = "0.1.0" +dependencies = [ + "console_error_panic_hook", + "once_cell", + "tinysearch", + "wasm-bindgen", + "ybc", + "yew", +] + +[[package]] +name = "yew-macro" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6e23bfe3dc3933fbe9592d149c9985f3047d08c637a884b9344c21e56e092ef" +dependencies = [ + "boolinator", + "lazy_static", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "yew-router" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27666236d9597eac9be560e841e415e20ba67020bc8cd081076be178e159c8bc" +dependencies = [ + "cfg-if", + "cfg-match", + "gloo", + "js-sys", + "log", + "nom", + "serde", + "serde_json", + "wasm-bindgen", + "web-sys", + "yew", + "yew-router-macro", + "yew-router-route-parser", +] + +[[package]] +name = "yew-router-macro" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c0ace2924b7a175e2d1c0e62ee7022a5ad840040dcd52414ce5f410ab322dba" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "yew-router-route-parser", +] + +[[package]] +name = "yew-router-route-parser" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de4a67208fb46b900af18a7397938b01f379dfc18da34799cfa8347eec715697" +dependencies = [ + "nom", +] + +[[package]] +name = "yewtil" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8543663ac49cd613df079282a1d8bdbdebdad6e02bac229f870fd4237b5d9aaa" +dependencies = [ + "log", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "yew", +] diff --git a/examples/yew-example-storage/Cargo.toml b/examples/yew-example-storage/Cargo.toml new file mode 100644 index 0000000..b33550a --- /dev/null +++ b/examples/yew-example-storage/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "yew-example-storage" +version = "0.1.0" +edition = "2021" + + +[dependencies] +console_error_panic_hook = "0.1" +wasm-bindgen = "0.2" +ybc = "0.2" +yew = "0.18" +once_cell = "*" +tinysearch = {path="../../tinysearch"} + +[workspace] +members = [] \ No newline at end of file diff --git a/examples/yew-example-storage/Trunk.toml b/examples/yew-example-storage/Trunk.toml new file mode 100644 index 0000000..62d4bb9 --- /dev/null +++ b/examples/yew-example-storage/Trunk.toml @@ -0,0 +1,10 @@ +[build] +target = "index.html" +dist = "dist" + +[[hooks]] +stage = "pre_build" +command = "sh" +command_arguments = [ + "-c", + "cd ../../ && cargo run --features=bin --release -- -m storage -p examples/yew-example-storage/src/ fixtures/index.json"] diff --git a/examples/yew-example-storage/index.html b/examples/yew-example-storage/index.html new file mode 100644 index 0000000..be37e29 --- /dev/null +++ b/examples/yew-example-storage/index.html @@ -0,0 +1,15 @@ + + + + + + Trunk | Yew | YBC + + + + + + + + + diff --git a/examples/yew-example-storage/src/index.scss b/examples/yew-example-storage/src/index.scss new file mode 100644 index 0000000..35a1942 --- /dev/null +++ b/examples/yew-example-storage/src/index.scss @@ -0,0 +1,3 @@ +@charset "utf-8"; + +html {} diff --git a/examples/yew-example-storage/src/main.rs b/examples/yew-example-storage/src/main.rs new file mode 100644 index 0000000..d3e5ac7 --- /dev/null +++ b/examples/yew-example-storage/src/main.rs @@ -0,0 +1,115 @@ +#![recursion_limit = "1024"] + +use console_error_panic_hook::set_once as set_panic_hook; +use once_cell::sync::Lazy; +use ybc::TileCtx::{Child, Parent}; +use ybc::InputType::Text; +use yew::prelude::*; +use tinysearch::{Filters, Storage,search}; + +struct App{ + value: String, + posts: Vec, + link: ComponentLink +} + +enum Msg { + SearchChanged(String) +} + +static FILTERS: Lazy = Lazy::new(|| { + let bytes = include_bytes!("storage"); + Storage::from_bytes(bytes).unwrap().filters +}); + +impl App{ + fn render_post(s: &String) -> yew::Html{ + html!{ + + + {s.clone()} + + + } + } +} + +impl Component for App { + type Message = Msg; + type Properties = (); + + fn create(_: Self::Properties, link: ComponentLink) -> Self { + Self{ + value: String::default(), + posts: Vec::new(), + link: link + } + } + + fn update(&mut self, msg: Self::Message) -> bool { + match msg { + Msg::SearchChanged(s) => { + if s != self.value{ + self.value = s; + let posts = search(&FILTERS, self.value.clone(), 5); + self.posts = posts.iter().map(|x|x.0.clone()).collect(); + true + }else{ + false + } + } + } + } + + fn change(&mut self, _: Self::Properties) -> bool { + false + } + + fn view(&self) -> Html { + html! { + <> + + {"Tinysearch | Trunk | Yew | YBC | demo"} + + } + navstart=html!{} + navend=html!{ + <> + + + + + } + /> + + + + + { + for self.posts.iter().map(App::render_post) + } + + + }> + + + } + } +} + +fn main() { + set_panic_hook(); + + yew::start_app::(); +} \ No newline at end of file diff --git a/report.json b/report.json new file mode 100644 index 0000000..940d8ff --- /dev/null +++ b/report.json @@ -0,0 +1,26 @@ +{ + "detailed_stats": false, + "total": 40, + "successful": 39, + "unknown": 0, + "unsupported": 0, + "timeouts": 0, + "redirects": 0, + "excludes": 0, + "errors": 1, + "cached": 0, + "success_map": {}, + "fail_map": { + "./README.md": [ + { + "url": "http://0.0.0.0:8000/demo.html", + "status": { + "text": "Failed: Network error", + "code": 404 + } + } + ] + }, + "suggestion_map": {}, + "excluded_map": {} +} \ No newline at end of file diff --git a/shared/.gitignore b/shared/.gitignore deleted file mode 100644 index 2f88dba..0000000 --- a/shared/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/target -**/*.rs.bk -Cargo.lock \ No newline at end of file diff --git a/shared/Cargo.toml b/shared/Cargo.toml deleted file mode 100644 index 42cc843..0000000 --- a/shared/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "tinysearch-shared" -version = "0.7.0" -authors = ["Matthias Endler "] -edition = "2021" -description = "Shared libraries for tinysearch - a tiny search engine for static websites" -license = "Apache-2.0/MIT" -documentation = "https://github.com/mre/tinysearch/blob/master/README.md" -homepage = "https://github.com/mre/tinysearch" -repository = "https://github.com/mre/tinysearch" - -[dependencies] -bincode = "1.3.3" - -[dependencies.serde] -version = "1.0.127" -features = ["derive"] - -[dependencies.xorf] -version = "0.7.2" -default-features = false -features = ["serde"] diff --git a/shared/src/lib.rs b/shared/src/lib.rs deleted file mode 100644 index 86afffd..0000000 --- a/shared/src/lib.rs +++ /dev/null @@ -1,48 +0,0 @@ -use bincode::Error as BincodeError; -use serde::{Deserialize, Serialize}; -use std::convert::From; -use xorf::{Filter, HashProxy, Xor8}; - -use std::collections::hash_map::DefaultHasher; - -type Title = String; -type Url = String; -type Meta = Option; -pub type PostId = (Title, Url, Meta); -pub type PostFilter = (PostId, HashProxy); -pub type Filters = Vec; - -#[derive(Serialize, Deserialize)] -pub struct Storage { - pub filters: Filters, -} - -impl From for Storage { - fn from(filters: Filters) -> Self { - Storage { filters } - } -} - -pub trait Score { - fn score(&self, terms: &[String]) -> usize; -} - -// the score denotes the number of terms from the query that are contained in the -// current filter -impl Score for HashProxy { - fn score(&self, terms: &[String]) -> usize { - terms.iter().filter(|term| self.contains(term)).count() - } -} - -impl Storage { - pub fn to_bytes(&self) -> Result, BincodeError> { - let encoded: Vec = bincode::serialize(&self)?; - Ok(encoded) - } - - pub fn from_bytes(bytes: &[u8]) -> Result { - let decoded: Filters = bincode::deserialize(bytes)?; - Ok(Storage { filters: decoded }) - } -} diff --git a/testscript.sh b/testscript.sh new file mode 100755 index 0000000..976e2f1 --- /dev/null +++ b/testscript.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +# + +for example in `ls -1 examples`; do + (cd "examples/$example" && trunk build index.html) || exit 1; +done diff --git a/bin/.gitignore b/tinysearch/.gitignore similarity index 100% rename from bin/.gitignore rename to tinysearch/.gitignore diff --git a/tinysearch/Cargo.toml b/tinysearch/Cargo.toml new file mode 100644 index 0000000..f2b254d --- /dev/null +++ b/tinysearch/Cargo.toml @@ -0,0 +1,58 @@ +[package] +name = "tinysearch" +authors = ["Matthias Endler "] +version = "0.7.0" +edition = "2021" +description = "A tiny search engine for static websites" +license = "Apache-2.0/MIT" +documentation = "https://github.com/mre/tinysearch/blob/master/README.md" +homepage = "https://github.com/mre/tinysearch" +repository = "https://github.com/mre/tinysearch" +include = ["/src/assets"] +[lib] + +[[bin]] +name = "tinysearch" +required-features = ["bin"] + +[dependencies] +bincode = "1.3.3" + + +argh = { version = "0.1.10", optional = true } +log = { version = "0.4.19", optional = true } +serde_json = { version = "1.0.103", optional = true } +anyhow = { version = "1.0.72", optional = true } +tempfile = { version = "3.7.0", optional = true } + +serde_derive = { version = "1.0.175", optional = true } +phf = { version = "0.11.2", optional = true } +strip_markdown = { version = "0.2.0", optional = true } +strum = { version = "0.25.0", features = ["derive"], optional = true } +toml_edit = { version = "0.19.14", optional = true } + + +[dependencies.serde] +version = "1.0.175" +features = ["derive"] + +[dependencies.xorf] +version = "0.8.1" +default-features = false +features = ["serde"] + + +[features] +default = [] +bin = [ + "argh", + "log", + "serde_json", + "anyhow", + "tempfile", + "serde_derive", + "phf", + "strip_markdown", + "strum", + "toml_edit", +] diff --git a/engine/Cargo.toml b/tinysearch/assets/crate/Cargo.toml similarity index 59% rename from engine/Cargo.toml rename to tinysearch/assets/crate/Cargo.toml index 54ae348..b610f75 100644 --- a/engine/Cargo.toml +++ b/tinysearch/assets/crate/Cargo.toml @@ -1,5 +1,7 @@ +# WARNING: this file is autogenerated! + [package] -name = "tinysearch-engine" +name = "THIS_VALUE_SHOULD_BE_FILLED" authors = ["Matthias Endler "] version = "0.7.0" edition = "2021" @@ -7,26 +9,28 @@ description = "A tiny search engine for static websites" license = "Apache-2.0/MIT" documentation = "https://github.com/mre/tinysearch/blob/master/README.md" homepage = "https://github.com/mre/tinysearch" +wasm-opt = false [lib] crate-type = ["cdylib"] +[features] +default=["bind"] +bind=["wee_alloc", "wasm-bindgen", "serde-wasm-bindgen"] + [dependencies] -wee_alloc = "0.4.5" once_cell = "1.8.0" +tinysearch = "THIS_VALUE_SHOULD_BE_FILLED" -[dependencies.tinysearch-shared] -path = "../shared" -version = "0.7.0" + +wee_alloc = {version = "0.4.5", optional = true} +wasm-bindgen= {version = "0.2.75", optional = true} +serde-wasm-bindgen = {version = "0.4", optional = true} [dependencies.xorf] version = "0.7.2" default-features = false -[dependencies.wasm-bindgen] -version = "0.2.75" -features = ["serde-serialize"] - [workspace] members = [] @@ -34,3 +38,4 @@ members = [] opt-level = 's' # Optimize for size. lto = true codegen-units = 1 + diff --git a/tinysearch/assets/crate/src/lib.rs b/tinysearch/assets/crate/src/lib.rs new file mode 100644 index 0000000..3556be5 --- /dev/null +++ b/tinysearch/assets/crate/src/lib.rs @@ -0,0 +1,27 @@ +use once_cell::sync::Lazy; + +#[cfg(feature = "bind")] +use serde_wasm_bindgen; +#[cfg(feature = "bind")] +use wasm_bindgen::prelude::*; + +use tinysearch::{search as base_search, Filters, PostId, Storage}; + +#[cfg(feature = "bind")] +#[global_allocator] +static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT; + +static FILTERS: Lazy = Lazy::new(|| { + let bytes = include_bytes!("storage"); + Storage::from_bytes(bytes).unwrap().filters +}); + +pub fn search_local(query: String, num_results: usize) -> Vec<&'static PostId> { + base_search(&FILTERS, query, num_results) +} + +#[cfg(feature = "bind")] +#[wasm_bindgen] +pub fn search(query: String, num_results: usize) -> JsValue { + serde_wasm_bindgen::to_value(&search_local(query, num_results)).unwrap() +} diff --git a/tinysearch/assets/demo.html b/tinysearch/assets/demo.html new file mode 100644 index 0000000..5fae875 --- /dev/null +++ b/tinysearch/assets/demo.html @@ -0,0 +1,72 @@ + + + + + + + + + + + + +

Search

+ +

Results

+
    +
+ + + \ No newline at end of file diff --git a/bin/assets/stopwords b/tinysearch/assets/stopwords similarity index 100% rename from bin/assets/stopwords rename to tinysearch/assets/stopwords diff --git a/tinysearch/src/bin/tinysearch.rs b/tinysearch/src/bin/tinysearch.rs new file mode 100644 index 0000000..b5c7304 --- /dev/null +++ b/tinysearch/src/bin/tinysearch.rs @@ -0,0 +1,425 @@ +#![cfg(feature = "bin")] +#[macro_use] +extern crate log; + +mod utils; +use utils::assets; +use utils::index; +use utils::storage; + +use anyhow::{bail, Context}; +pub use anyhow::{Error, Result}; +use argh::FromArgs; +use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::str::FromStr; +use std::{env, fs}; +use tempfile::TempDir; +use toml_edit::{value, Document}; + +use index::Posts; +use strum::{EnumString, IntoStaticStr}; + +fn ensure_exists(path: PathBuf) -> Result { + if !path.exists() { + fs::create_dir_all(&path)?; + } + let path = path.canonicalize()?; + if !path.exists() { + for path in fs::read_dir(&path)? { + println!("Name: {}", path.unwrap().path().display()) + } + bail!("Directory could not be created at {}", &path.display()); + } + Ok(path) +} + +#[derive(Debug)] +enum DirOrTemp { + Path(PathBuf), + Temp(TempDir), +} + +impl DirOrTemp { + pub fn path(&self) -> PathBuf { + match self { + DirOrTemp::Path(p) => p.clone(), + DirOrTemp::Temp(p) => p.path().to_path_buf(), + } + } +} + +impl Default for DirOrTemp { + fn default() -> Self { + Self::Temp(TempDir::new().expect("Failed to create a temporary directory")) + } +} + +impl FromStr for DirOrTemp { + type Err = ::Err; + + fn from_str(s: &str) -> std::result::Result { + Ok(DirOrTemp::Path(PathBuf::from_str(s)?)) + } +} + +#[derive(IntoStaticStr, EnumString, Clone)] +#[strum(serialize_all = "snake_case")] +enum OutputMode { + Search, + Storage, + Crate, + Wasm, +} + +fn parse_engine_version(str: &str) -> Result { + let doc = str.parse::().map_err(|e| e.to_string())?; + Ok(doc.as_table().clone()) +} + +#[derive(FromArgs, Clone)] +/// A tiny, static search engine for static websites +/// +/// +/// It can run in several modes (-m/--mode argument). +/// Valid modes are: +/// **search** - runs search engine on generated storage data, +/// **storage** - generates storage data for posts, +/// **crate** - creates a Rust crate with storage data, +/// **wasm** - creates a crate and generates a loadable js/wasm script. +/// +struct Opt { + /// show version and exit + #[argh(switch)] + version: bool, + + /// output mode + #[argh(option, short = 'm', long = "mode", default = "OutputMode::Wasm")] + output_mode: OutputMode, + + /// term to search in posts (only for search mode) + #[argh( + option, + short = 'S', + long = "search-term", + default = "String::default()" + )] + search_term: String, + + /// number of posts to show in search results (only for search mode) + #[argh(option, short = 'N', long = "num-searches", default = "5")] + num_searches: usize, + + /// input file to process (either JSON with posts for code generation or storage for inference) + #[argh(positional)] + input_file: Option, + + /// output path for WASM module ("wasm_output" directory by default) + #[argh( + option, + short = 'p', + long = "path", + default = "\"./wasm_output\".into()" + )] + out_path: PathBuf, + + /// where to put generated crate + /// * In wasm mode crate is generated: + /// * If this option is specified: in this path. + /// * If this option is omitted: in a temp directory removed after run. + /// * In crate mode this is ignored in favor of -p/--path. + #[argh(option, long = "crate-path")] + crate_path: Option, + + /// this version will be used in Cargo.toml for the generated crate + /// (only used in wasm, crate modes). This should be a valid TOML table definition. + /// Default is 'version="env!("CARGO_PKG_VERSION")"'. If you have a local version of + /// tinysearch, you can specify 'path="/path/to/tinysearch"' + #[argh( + option, + short = 'e', + long = "engine-version", + from_str_fn(parse_engine_version), + default = "format!(\"version=\\\"{}\\\"\", env!(\"CARGO_PKG_VERSION\")).parse::().unwrap().as_table().clone()" + )] + engine_version: toml_edit::Table, + + /// this name will be used in Cargo.toml for the generated crate (only used in wasm and crate modes) + #[argh(option, long = "crate-name", default = "\"tinysearch-engine\".into()")] + crate_name: String, + + /// removes all top-level configs from Cargo.toml of generated crate and makes it locally importable (only makes sense in crate mode) + #[argh(switch, long = "non-top-level-crate")] + non_top_level_crate: bool, + + /// optimize the output using binaryen (only valid in wasm mode) + #[argh(switch, short = 'o', long = "optimize")] + optimize: bool, +} + +trait Stage: Sized { + fn from_opt(opt: &Opt) -> Result; + + fn build(&self) -> Result<(), Error>; +} + +#[derive(Default)] +struct Search { + storage_file: PathBuf, + term: String, + num_searches: usize, +} + +impl Stage for Search { + fn from_opt(opt: &Opt) -> Result { + let input = opt.input_file.clone().context("Missing input file")?; + let term = opt.search_term.clone(); + Ok(Self { + storage_file: input + .canonicalize() + .with_context(|| format!("Failed to find file: {}", input.display()))?, + term, + num_searches: opt.num_searches, + }) + } + + fn build(&self) -> Result<(), Error> { + use tinysearch::{search as base_search, Storage}; + let bytes = fs::read(&self.storage_file).with_context(|| { + format!("Failed to read input file: {}", self.storage_file.display()) + })?; + let filters = Storage::from_bytes(&bytes)?.filters; + let results = base_search(&filters, self.term.clone(), self.num_searches); + for result in results { + println!( + "Title: {}, Url: {}, Meta: {:?}", + result.0, result.1, result.2 + ); + } + Ok(()) + } +} + +#[derive(Default)] +struct Storage { + posts_index: PathBuf, + out_path: PathBuf, +} + +impl Stage for Storage { + fn from_opt(opt: &Opt) -> Result { + Ok(Self { + posts_index: opt.input_file.clone().context("No input file")?, + out_path: ensure_exists(opt.out_path.clone())?, + }) + } + + fn build(&self) -> Result<(), Error> { + let storage_file = self.out_path.join("storage"); + println!( + "Creating storage file for posts {} in file {}", + self.posts_index.display(), + storage_file.display() + ); + let posts: Posts = index::read( + fs::read_to_string(&self.posts_index) + .with_context(|| format!("Failed to read file {}", self.posts_index.display()))?, + ) + .with_context(|| format!("Failed to decode {}", self.posts_index.display()))?; + trace!("Generating storage from posts: {:#?}", posts); + storage::write(posts, &storage_file)?; + println!("Storage ready in file {}", storage_file.display()); + Ok(()) + } +} + +#[derive(Default)] +struct Crate { + s: Storage, + out_path: PathBuf, + crate_name: String, + engine_version: toml_edit::Table, + non_top_level: bool, +} + +impl Stage for Crate { + fn from_opt(opt: &Opt) -> Result { + if opt.crate_path.is_some() { + bail!("Don't use --crate-path to specify crate output dir!"); + } + let out_path = ensure_exists(opt.out_path.clone())?; + let storage_opt = { + let mut ret: Opt = opt.clone(); + ret.out_path = ensure_exists(out_path.join("src"))?; + ret + }; + + Ok(Self { + s: Storage::from_opt(&storage_opt)?, + out_path, + crate_name: opt.crate_name.clone(), + engine_version: opt.engine_version.clone(), + non_top_level: opt.non_top_level_crate, + }) + } + + fn build(&self) -> Result<(), Error> { + println!( + "Creating tinysearch implementation crate {} in directory {}", + self.crate_name, + self.out_path.display() + ); + let cargo_toml = self.out_path.join("Cargo.toml"); + let mut cargo_toml_contents = assets::CRATE_CARGO_TOML.parse::()?; + cargo_toml_contents["package"]["name"] = value(self.crate_name.clone()); + cargo_toml_contents["dependencies"]["tinysearch"] = + toml_edit::Item::Table(self.engine_version.clone()); + if self.non_top_level { + cargo_toml_contents.as_table_mut().remove("workspace"); + cargo_toml_contents.as_table_mut().remove("profile"); + cargo_toml_contents.as_table_mut().remove("lib"); + cargo_toml_contents["lib"] = toml_edit::table(); + } + fs::write(cargo_toml, cargo_toml_contents.to_string())?; + + // let mut file = fs::OpenOptions::new().write(true).truncate(true).open(&cargo_toml)?; + // file.write(new.as_bytes())?; + + self.s.build().context("Failed building storage")?; + fs::write( + self.out_path.join("src").join("lib.rs"), + assets::CRATE_LIB_RS, + )?; + println!("Crate content generated in {}/", &self.out_path.display()); + Ok(()) + } +} + +#[derive(Default)] +struct Wasm { + c: Crate, + out_path: PathBuf, + crate_path: DirOrTemp, + optimize: bool, +} + +impl Wasm { + fn ensure_crate_path(crate_path: &Option) -> Result { + Ok(match crate_path { + Some(p) => DirOrTemp::Path(ensure_exists(p.clone())?), + None => DirOrTemp::default(), + }) + } +} + +impl Stage for Wasm { + fn from_opt(opt: &Opt) -> Result { + let crate_path = Wasm::ensure_crate_path(&opt.crate_path)?; + let crate_opt = { + let mut ret: Opt = opt.clone(); + ret.out_path = crate_path.path(); + ret.crate_path = None; + ret + }; + Ok(Self { + c: Crate::from_opt(&crate_opt)?, + out_path: ensure_exists(opt.out_path.clone())?, + crate_path, + optimize: opt.optimize, + }) + } + + fn build(self: &Wasm) -> Result<(), Error> { + self.c.build().context("Failed generating crate")?; + println!("Compiling WASM module using wasm-pack"); + let crate_path = self.crate_path.path(); + run_output( + Command::new("wasm-pack") + .arg("build") + .arg(&crate_path) + .arg("--target") + .arg("web") + .arg("--release") + .arg("--out-dir") + .arg(&self.out_path), + )?; + let wasm_name = self.c.crate_name.replace('-', "_"); + + if self.optimize { + let wasm_file = format!("{}_bg.wasm", &wasm_name); + run_output( + Command::new("wasm-opt") + .current_dir(&self.out_path) + .arg("-Oz") + .arg("-o") + .arg(&wasm_file) + .arg(&wasm_file), + )?; + } + let html_path = self.out_path.join("demo.html"); + fs::write( + &html_path, + assets::DEMO_HTML.replace("{WASM_NAME}", &wasm_name), + ) + .with_context(|| format!("Failed writing demo.html to {}", &html_path.display()))?; + println!("All done! Open the output folder with a web server to try the demo."); + Ok(()) + } +} + +pub fn main() -> Result<(), Error> { + let opt: Opt = argh::from_env(); + + if opt.version { + println!("tinysearch {}", env!("CARGO_PKG_VERSION")); + std::process::exit(0); + } + + let parse_ctx = || { + format!( + "Failed to parse options for {} mode", + Into::<&'static str>::into(&opt.output_mode) + ) + }; + + match opt.output_mode { + OutputMode::Search => Search::from_opt(&opt).with_context(parse_ctx)?.build(), + OutputMode::Storage => Storage::from_opt(&opt).with_context(parse_ctx)?.build(), + OutputMode::Crate => Crate::from_opt(&opt).with_context(parse_ctx)?.build(), + OutputMode::Wasm => Wasm::from_opt(&opt).with_context(parse_ctx)?.build(), + } + .with_context(|| { + format!( + "Failed to build {} mode", + Into::<&'static str>::into(&opt.output_mode) + ) + }) +} + +pub fn run_output(cmd: &mut Command) -> Result { + println!("running {:?}", cmd); + let output = cmd + .stderr(Stdio::inherit()) + .output() + .with_context(|| format!("failed to run {:?}", cmd))?; + + if !output.status.success() { + anyhow::bail!("failed to execute {:?}\nstatus: {}", cmd, output.status) + } + Ok(String::from_utf8_lossy(&output.stdout).into_owned()) +} + +// #[cfg(test)] +// mod tests { +// use super::*; + +// #[test] +// fn test_compile_example(){ +// run_output( +// Command::new("/home/delphi/.cargo/bin/trunk") +// .current_dir("../examples/yew-example-storage") +// .arg("build") +// .arg("--release") +// ).unwrap(); +// } +// } diff --git a/tinysearch/src/bin/utils/assets.rs b/tinysearch/src/bin/utils/assets.rs new file mode 100644 index 0000000..a367b10 --- /dev/null +++ b/tinysearch/src/bin/utils/assets.rs @@ -0,0 +1,14 @@ +pub static CRATE_CARGO_TOML: &str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/assets/crate/Cargo.toml" +)); +pub static CRATE_LIB_RS: &str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/assets/crate/src/lib.rs" +)); + +// Include a bare-bones HTML page template that demonstrates how tinysearch is used +pub static DEMO_HTML: &str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/assets/demo.html")); + +pub static STOP_WORDS: &str = + include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/assets/stopwords")); diff --git a/bin/src/index.rs b/tinysearch/src/bin/utils/index.rs similarity index 100% rename from bin/src/index.rs rename to tinysearch/src/bin/utils/index.rs diff --git a/tinysearch/src/bin/utils/mod.rs b/tinysearch/src/bin/utils/mod.rs new file mode 100644 index 0000000..1c3b10c --- /dev/null +++ b/tinysearch/src/bin/utils/mod.rs @@ -0,0 +1,3 @@ +pub mod assets; +pub mod index; +pub mod storage; diff --git a/bin/src/storage.rs b/tinysearch/src/bin/utils/storage.rs similarity index 87% rename from bin/src/storage.rs rename to tinysearch/src/bin/utils/storage.rs index b0c9979..33249cd 100644 --- a/bin/src/storage.rs +++ b/tinysearch/src/bin/utils/storage.rs @@ -1,18 +1,20 @@ use anyhow::Error; use std::collections::{HashMap, HashSet}; use std::fs; +use std::path; -use crate::index::Posts; +use super::assets::STOP_WORDS; +use super::index::Posts; use strip_markdown::strip_markdown; -use tinysearch_shared::{Filters, PostId, Storage}; +use tinysearch::{Filters, PostId, Storage}; use xorf::HashProxy; -pub fn write(posts: Posts) -> Result<(), Error> { +pub fn write(posts: Posts, path: &path::PathBuf) -> Result<(), Error> { let filters = build(posts)?; trace!("Storage::from"); let storage = Storage::from(filters); trace!("Write"); - fs::write("storage", storage.to_bytes()?)?; + fs::write(path, storage.to_bytes()?)?; trace!("ok"); Ok(()) } @@ -45,8 +47,7 @@ pub fn generate_filters(posts: HashMap>) -> Result = stopwords.split_whitespace().map(String::from).collect(); + let stopwords: HashSet = STOP_WORDS.split_whitespace().map(String::from).collect(); let split_posts: HashMap>> = posts .into_iter() @@ -97,11 +98,11 @@ mod tests { let mut posts = HashMap::new(); posts.insert( ( - "Maybe You Don't Need Kubernetes, Or Excel - You Know".to_string(),//title - "".to_string(),//url - None,//meta + "Maybe You Don't Need Kubernetes, Or Excel - You Know".to_string(), //title + "".to_string(), //url + None, //meta ), - None,//body + None, //body ); let filters = generate_filters(posts).unwrap(); assert_eq!(filters.len(), 1); diff --git a/tinysearch/src/lib.rs b/tinysearch/src/lib.rs new file mode 100644 index 0000000..8d85e7d --- /dev/null +++ b/tinysearch/src/lib.rs @@ -0,0 +1,83 @@ +use bincode::Error as BincodeError; +use serde::{Deserialize, Serialize}; +use std::cmp::Reverse; +use std::collections::hash_map::DefaultHasher; +use std::convert::From; +use xorf::{Filter as XorfFilter, HashProxy, Xor8}; + +type Title = String; +type Url = String; +type Meta = Option; +pub type PostId = (Title, Url, Meta); +pub type PostFilter = (PostId, HashProxy); +pub type Filters = Vec; + +#[derive(Serialize, Deserialize)] +pub struct Storage { + pub filters: Filters, +} + +impl From for Storage { + fn from(filters: Filters) -> Self { + Storage { filters } + } +} + +pub trait Score { + fn score(&self, terms: &[String]) -> usize; +} + +// the score denotes the number of terms from the query that are contained in the +// current filter +impl Score for HashProxy { + fn score(&self, terms: &[String]) -> usize { + terms.iter().filter(|term| self.contains(term)).count() + } +} + +impl Storage { + pub fn to_bytes(&self) -> Result, BincodeError> { + let encoded: Vec = bincode::serialize(&self)?; + Ok(encoded) + } + + pub fn from_bytes(bytes: &[u8]) -> Result { + let decoded: Filters = bincode::deserialize(bytes)?; + Ok(Storage { filters: decoded }) + } +} + +pub type Filter = HashProxy; + +const TITLE_WEIGHT: usize = 3; + +// Wrapper around filter score, that also scores the post title +// Post title score has a higher weight than post body +fn score(title: &str, search_terms: &[String], filter: &Filter) -> usize { + let title_terms: Vec = tokenize(title); + let title_score: usize = search_terms + .iter() + .filter(|term| title_terms.contains(term)) + .count(); + TITLE_WEIGHT * title_score + filter.score(search_terms) +} + +fn tokenize(s: &str) -> Vec { + s.to_lowercase() + .split_whitespace() + .filter(|&t| !t.trim().is_empty()) + .map(String::from) + .collect() +} +pub fn search(filters: &'_ Filters, query: String, num_results: usize) -> Vec<&'_ PostId> { + let search_terms: Vec = tokenize(&query); + let mut matches: Vec<(&PostId, usize)> = filters + .iter() + .map(|(post_id, filter)| (post_id, score(&post_id.0, &search_terms, filter))) + .filter(|(_post_id, score)| *score > 0) + .collect(); + + matches.sort_by_key(|k| Reverse(k.1)); + + matches.into_iter().take(num_results).map(|p| p.0).collect() +} From 3e22166f0e4b708ed082ee2bba1385e78e5fed1d Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 13:57:20 +0200 Subject: [PATCH 15/58] Update install command --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 585965e..ad0df57 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ build: ### Compile project .PHONY: install install: ## Install tinysearch - cargo install --force --path tinysearch + cargo install --force --path tinysearch --features=bin .PHONY: test test: ## Run unit tests From 4de60a82814440424ecb0a920731d45072a0353a Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 14:27:55 +0200 Subject: [PATCH 16/58] cleanup --- report.json | 26 -------------------------- testscript.sh | 6 ------ 2 files changed, 32 deletions(-) delete mode 100644 report.json delete mode 100755 testscript.sh diff --git a/report.json b/report.json deleted file mode 100644 index 940d8ff..0000000 --- a/report.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "detailed_stats": false, - "total": 40, - "successful": 39, - "unknown": 0, - "unsupported": 0, - "timeouts": 0, - "redirects": 0, - "excludes": 0, - "errors": 1, - "cached": 0, - "success_map": {}, - "fail_map": { - "./README.md": [ - { - "url": "http://0.0.0.0:8000/demo.html", - "status": { - "text": "Failed: Network error", - "code": 404 - } - } - ] - }, - "suggestion_map": {}, - "excluded_map": {} -} \ No newline at end of file diff --git a/testscript.sh b/testscript.sh deleted file mode 100755 index 976e2f1..0000000 --- a/testscript.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash -# - -for example in `ls -1 examples`; do - (cd "examples/$example" && trunk build index.html) || exit 1; -done From f2dfbc972d241707b5c62863cd77118ab6016857 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 14:27:59 +0200 Subject: [PATCH 17/58] bump version --- Cargo.lock | 2 +- tinysearch/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ca5e0a..f0bf4e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -351,7 +351,7 @@ dependencies = [ [[package]] name = "tinysearch" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "argh", diff --git a/tinysearch/Cargo.toml b/tinysearch/Cargo.toml index f2b254d..65b2503 100644 --- a/tinysearch/Cargo.toml +++ b/tinysearch/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tinysearch" authors = ["Matthias Endler "] -version = "0.7.0" +version = "0.8.0" edition = "2021" description = "A tiny search engine for static websites" license = "Apache-2.0/MIT" From d83c852583108cd4586927d7507358e806db7f61 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 14:31:35 +0200 Subject: [PATCH 18/58] cleanup --- engine/Cargo.lock | 260 ---------------------------------------------- 1 file changed, 260 deletions(-) delete mode 100644 engine/Cargo.lock diff --git a/engine/Cargo.lock b/engine/Cargo.lock deleted file mode 100644 index a2f3dd3..0000000 --- a/engine/Cargo.lock +++ /dev/null @@ -1,260 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "bincode" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" -dependencies = [ - "serde", -] - -[[package]] -name = "bumpalo" -version = "3.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f1e260c3a9040a7c19a12468758f4c16f31a81a1fe087482be9570ec864bb6c" - -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "itoa" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.112" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125" - -[[package]] -name = "log" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "memory_units" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8452105ba047068f40ff7093dd1d9da90898e63dd61736462e9cdda6a90ad3c3" - -[[package]] -name = "once_cell" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" - -[[package]] -name = "proc-macro2" -version = "1.0.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f84e92c0f7c9d58328b85a78557813e4bd845130db68d7184635344399423b1" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "quote" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "ryu" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" - -[[package]] -name = "serde" -version = "1.0.132" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9875c23cf305cd1fd7eb77234cbb705f21ea6a72c637a5c6db5fe4b8e7f008" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.132" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc0db5cb2556c0e558887d9bbdcf6ac4471e83ff66cf696e5419024d1606276" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcbd0344bc6533bc7ec56df11d42fb70f1b912351c0825ccb7211b59d8af7cf5" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "syn" -version = "1.0.84" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecb2e6da8ee5eb9a61068762a32fa9619cc591ceb055b3687f4cd4051ec2e06b" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "tinysearch-engine" -version = "0.6.4" -dependencies = [ - "once_cell", - "tinysearch-shared", - "wasm-bindgen", - "wee_alloc", - "xorf", -] - -[[package]] -name = "tinysearch-shared" -version = "0.6.4" -dependencies = [ - "bincode", - "serde", - "xorf", -] - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "wasm-bindgen" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" -dependencies = [ - "cfg-if 1.0.0", - "serde", - "serde_json", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" - -[[package]] -name = "wee_alloc" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb3b5a6b2bb17cb6ad44a2e68a43e8d2722c997da10e928665c72ec6c0a0b8e" -dependencies = [ - "cfg-if 0.1.10", - "libc", - "memory_units", - "winapi", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "xorf" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afb3a52aef0211e557044386369919b033004fdcf4c56c5017710f97fa9aa3c" -dependencies = [ - "serde", -] From 2206d32e6aad2f60ebe7117ac991a89fe44709ea Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 14:35:50 +0200 Subject: [PATCH 19/58] update .gitignore --- .gitignore | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 27a0c23..eb13164 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,4 @@ -target +target/ +.vscode storage -tinysearch_engine.js -tinysearch_engine.d.ts -tinysearch_engine_bg.wasm -tinysearch_engine_bg.d.ts -tinysearch_engine_bg.wasm.d.ts -package.json -pkg/ wasm_output/ -.vscode -tinysearch-engine From 25df8b5b013eef835bbe3add92821abc68aee7ca Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 14:36:07 +0200 Subject: [PATCH 20/58] Clean up wasm_output --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index ad0df57..6f5f5e8 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ lint: ### Lint project using clippy .PHONY: clean clean: ### Clean up build artifacts cargo clean + rm -rf wasm_output .PHONY: build build: ### Compile project From 040c109fca9d13c1c6181582b707466c74c4e7bd Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 14:37:47 +0200 Subject: [PATCH 21/58] Update lint command --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6f5f5e8..39cd5e4 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ help: ## This help message .PHONY: lint lint: ### Lint project using clippy - cargo clippy + cargo clippy --all-targets --all-features -- -D warnings .PHONY: clean clean: ### Clean up build artifacts From a01413133b16047a855336a6c2e2d0b76809e898 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 14:55:48 +0200 Subject: [PATCH 22/58] Update Docker build --- Dockerfile | 2 +- Makefile | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 0e3d8e6..c8fb45f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,7 +36,7 @@ RUN terser --version RUN cd /tmp && git clone --branch "$TINY_BRANCH" "$TINY_REPO" RUN set -ex -o pipefail; \ cd /tmp/tinysearch \ - && cargo build --release \ + && cargo build --release --features=bin \ && cp target/release/tinysearch $CARGO_HOME/bin RUN curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh diff --git a/Makefile b/Makefile index 39cd5e4..29b03d8 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,10 @@ clean: ### Clean up build artifacts build: ### Compile project cargo build +.PHONY: build-docker +build-docker: ### Build Docker image + docker build -t tinysearch/cli . + .PHONY: install install: ## Install tinysearch cargo install --force --path tinysearch --features=bin From 9efff529936f25cd3b18cc48cddf29856fc85943 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 15:03:03 +0200 Subject: [PATCH 23/58] cleanup --- tinysearch/.gitignore | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 tinysearch/.gitignore diff --git a/tinysearch/.gitignore b/tinysearch/.gitignore deleted file mode 100644 index a52bb12..0000000 --- a/tinysearch/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -storage -tinysearch_engine.js -tinysearch_engine.d.ts -tinysearch_engine_bg.wasm -tinysearch_engine_bg.d.ts -package.json From ef2e4ef6da691bbe1cdca8f3c48c66585cbd3d04 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 15:03:13 +0200 Subject: [PATCH 24/58] Update Make targets --- Makefile | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 29b03d8..1c37356 100644 --- a/Makefile +++ b/Makefile @@ -32,13 +32,4 @@ test: ## Run unit tests .PHONY: run run: ## Run tinysearch with sample input - cargo run -- fixtures/index.json - -.PHONY: pack -pack: ## Pack tinysearch node module - wasm-pack build tinysearch - wasm-pack pack - -.PHONY: publish -publish: pack ## Publish tinysearch to NPM - wasm-pack publish \ No newline at end of file + cargo run --features="bin" -- fixtures/index.json From 02578bfe4d7b098766328ffd0f50eb33f4b7b048 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 15:11:31 +0200 Subject: [PATCH 25/58] spell out `engine-version` argument --- .github/workflows/ci.yml | 2 +- Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f6b5b1..07347f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -111,7 +111,7 @@ jobs: continue-on-error: ${{ matrix.rust == 'nightly' }} - name: Build WASM package from test index - run: RUST_LOG=debug tinysearch -e 'path= "'$PWD'/tinysearch"' fixtures/index.json + run: RUST_LOG=debug tinysearch --engine-version 'path= "'$PWD'/tinysearch"' fixtures/index.json continue-on-error: ${{ matrix.rust == 'nightly' }} - name: Build examples diff --git a/Dockerfile b/Dockerfile index c8fb45f..b3b2454 100644 --- a/Dockerfile +++ b/Dockerfile @@ -58,7 +58,7 @@ COPY --from=binary-build /usr/local/cargo/bin/ /usr/local/bin/ # crate cache init. No need to download crate for future usage RUN set -eux -o pipefail; \ echo '[{"title":"","body":"","url":""}]' > build.json; \ - tinysearch build.json; \ + tinysearch --engine-version 'path= "'$PWD'/tinysearch"' build.json; \ rm -rf /tmp/* ENTRYPOINT ["tinysearch"] From e641b22b777d5ea94963a544a698028c8557fecf Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 15:27:08 +0200 Subject: [PATCH 26/58] Copy tinysearch build directory to be used as engine --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index b3b2454..32443b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,6 +54,8 @@ RUN set -eux -o pipefail; \ COPY --from=binary-build /usr/local/bin/ /usr/local/bin/ COPY --from=binary-build /usr/local/cargo/bin/ /usr/local/bin/ +# Copy tinysearch build directory to be used as engine (see `--engine-version` option below) +COPY --from=binary-build /tmp/tinysearch/tinysearch/ tinysearch # crate cache init. No need to download crate for future usage RUN set -eux -o pipefail; \ From 6665de2708c09e1bea82dd5c16b61e88dd82853c Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 15:37:53 +0200 Subject: [PATCH 27/58] Follow Docker idioms --- Dockerfile | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/Dockerfile b/Dockerfile index 32443b4..c614e85 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,8 +14,9 @@ ARG TINY_BRANCH WORKDIR /tmp +# Install dependencies RUN apk add --update --no-cache --virtual \ - build-dependencies \ + .build-deps \ musl-dev \ openssl-dev \ gcc \ @@ -25,42 +26,42 @@ RUN apk add --update --no-cache --virtual \ gcc \ ca-certificates \ libc6-compat \ - binaryen - -RUN set -eux -o pipefail; \ - ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld64.so.1; \ - npm install terser -g; + binaryen && \ + ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld64.so.1 && \ + npm install terser -g +# Verify the installation RUN terser --version -RUN cd /tmp && git clone --branch "$TINY_BRANCH" "$TINY_REPO" -RUN set -ex -o pipefail; \ - cd /tmp/tinysearch \ - && cargo build --release --features=bin \ - && cp target/release/tinysearch $CARGO_HOME/bin +# Clone the repo and build the binary +RUN git clone --branch "$TINY_BRANCH" "$TINY_REPO" /tmp/tinysearch && \ + cd /tmp/tinysearch && \ + cargo build --release --features=bin && \ + cp target/release/tinysearch $CARGO_HOME/bin +# Install wasm-pack RUN curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh +# Verify the installation RUN wasm-pack --version FROM $RUST_IMAGE -WORKDIR /tmp - -RUN apk add --update --no-cache libc6-compat musl-dev binaryen +WORKDIR /app -RUN set -eux -o pipefail; \ - ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld64.so.1; +# Install runtime dependencies +RUN apk add --update --no-cache libc6-compat musl-dev binaryen openssl-dev && \ + ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld64.so.1 +# Copy the build binaries and tinysearch directory COPY --from=binary-build /usr/local/bin/ /usr/local/bin/ COPY --from=binary-build /usr/local/cargo/bin/ /usr/local/bin/ # Copy tinysearch build directory to be used as engine (see `--engine-version` option below) -COPY --from=binary-build /tmp/tinysearch/tinysearch/ tinysearch +COPY --from=binary-build /tmp/tinysearch/tinysearch/ /app/tinysearch -# crate cache init. No need to download crate for future usage -RUN set -eux -o pipefail; \ - echo '[{"title":"","body":"","url":""}]' > build.json; \ - tinysearch --engine-version 'path= "'$PWD'/tinysearch"' build.json; \ +# Initialize crate cache +RUN echo '[{"title":"","body":"","url":""}]' > build.json && \ + tinysearch --engine-version 'path= "'$PWD'/tinysearch"' build.json && \ rm -rf /tmp/* ENTRYPOINT ["tinysearch"] From 4840ba07213f8693891d5b44a28bf7e346ff1f22 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 16:02:08 +0200 Subject: [PATCH 28/58] Update build command to include `bin` --- .github/workflows/release.yml | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6665e2c..4fb4435 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -67,7 +67,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: build - args: --release --target ${{ matrix.target }} + args: --release --features=bin --target ${{ matrix.target }} use-cross: true - name: Optimize and package binary diff --git a/Makefile b/Makefile index 1c37356..81b51f2 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ clean: ### Clean up build artifacts .PHONY: build build: ### Compile project - cargo build + cargo build --features=bin .PHONY: build-docker build-docker: ### Build Docker image From a9228b53dc02aec40ef17e7fdcca7d3cccbae365 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 18:45:49 +0200 Subject: [PATCH 29/58] fix path --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 07347f5..58e2aa4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -151,7 +151,7 @@ jobs: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} with: command: publish - args: --manifest-path bin/Cargo.toml + args: --manifest-path tinysearch/Cargo.toml - name: Create Github release for Linux, Windows, and macOS uses: softprops/action-gh-release@v1 From 41b049ac420469e9e29f419ebf1fce91023e42ed Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 19:16:09 +0200 Subject: [PATCH 30/58] Include all src files in release package --- tinysearch/Cargo.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tinysearch/Cargo.toml b/tinysearch/Cargo.toml index 65b2503..2527f98 100644 --- a/tinysearch/Cargo.toml +++ b/tinysearch/Cargo.toml @@ -8,7 +8,9 @@ license = "Apache-2.0/MIT" documentation = "https://github.com/mre/tinysearch/blob/master/README.md" homepage = "https://github.com/mre/tinysearch" repository = "https://github.com/mre/tinysearch" -include = ["/src/assets"] + +include = ["src/**/*"] + [lib] [[bin]] @@ -38,7 +40,7 @@ features = ["derive"] [dependencies.xorf] version = "0.8.1" -default-features = false +default-features = false features = ["serde"] From b57ae2215bf05db4d3b71781b3726891592d3f24 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 19:18:31 +0200 Subject: [PATCH 31/58] Add publish dry run --- .github/workflows/ci.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 58e2aa4..3e15eee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -133,7 +133,6 @@ jobs: args: --all-features --no-deps publish: - if: startsWith(github.ref, 'refs/tags/') needs: - test runs-on: ubuntu-latest @@ -145,7 +144,16 @@ jobs: with: command: fetch + - name: Publish (dry run) + uses: actions-rs/cargo@v1 + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + with: + command: publish + args: --manifest-path tinysearch/Cargo.toml --dry-run + - name: Publish + if: startsWith(github.ref, 'refs/tags/') uses: actions-rs/cargo@v1 env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} @@ -154,6 +162,7 @@ jobs: args: --manifest-path tinysearch/Cargo.toml - name: Create Github release for Linux, Windows, and macOS + if: startsWith(github.ref, 'refs/tags/') uses: softprops/action-gh-release@v1 with: prerelease: true From 42ed9c8bd9ac94ca8230aea472b83c417c36a618 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 24 Jul 2023 19:20:18 +0200 Subject: [PATCH 32/58] formatting --- .github/workflows/release.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4fb4435..039126c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,4 +1,5 @@ -name: release +name: Release + on: release: types: From 2c26d3ac2f3f886063c916ed68a18ab4ec1d1b91 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Tue, 25 Jul 2023 11:38:47 +0200 Subject: [PATCH 33/58] Remove workspace (#171) --- .github/workflows/ci.yml | 7 +- Cargo.toml | 64 ++++++++- Dockerfile | 2 +- Makefile | 2 +- .../assets => assets}/crate/Cargo.toml | 0 .../assets => assets}/crate/src/lib.rs | 3 +- {tinysearch/assets => assets}/demo.html | 0 {tinysearch/assets => assets}/stopwords | 0 examples/yew-example-crate/Cargo.toml | 4 +- examples/yew-example-crate/Trunk.toml | 2 +- examples/yew-example-storage/Cargo.lock | 134 ++++++++++-------- examples/yew-example-storage/Cargo.toml | 4 +- examples/yew-example-storage/Trunk.toml | 5 +- {tinysearch/src => src}/bin/tinysearch.rs | 0 {tinysearch/src => src}/bin/utils/assets.rs | 0 {tinysearch/src => src}/bin/utils/index.rs | 0 {tinysearch/src => src}/bin/utils/mod.rs | 0 {tinysearch/src => src}/bin/utils/storage.rs | 0 {tinysearch/src => src}/lib.rs | 0 tinysearch/Cargo.toml | 60 -------- 20 files changed, 146 insertions(+), 141 deletions(-) rename {tinysearch/assets => assets}/crate/Cargo.toml (100%) rename {tinysearch/assets => assets}/crate/src/lib.rs (92%) rename {tinysearch/assets => assets}/demo.html (100%) rename {tinysearch/assets => assets}/stopwords (100%) rename {tinysearch/src => src}/bin/tinysearch.rs (100%) rename {tinysearch/src => src}/bin/utils/assets.rs (100%) rename {tinysearch/src => src}/bin/utils/index.rs (100%) rename {tinysearch/src => src}/bin/utils/mod.rs (100%) rename {tinysearch/src => src}/bin/utils/storage.rs (100%) rename {tinysearch/src => src}/lib.rs (100%) delete mode 100644 tinysearch/Cargo.toml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3e15eee..d5b2829 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -107,11 +107,11 @@ jobs: args: --all-features - name: Build and install tinysearch - run: cargo install --force --path tinysearch --features=bin + run: cargo install --force --path . --features=bin continue-on-error: ${{ matrix.rust == 'nightly' }} - name: Build WASM package from test index - run: RUST_LOG=debug tinysearch --engine-version 'path= "'$PWD'/tinysearch"' fixtures/index.json + run: RUST_LOG=debug tinysearch --engine-version 'path= "'$PWD'"' fixtures/index.json continue-on-error: ${{ matrix.rust == 'nightly' }} - name: Build examples @@ -150,7 +150,7 @@ jobs: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} with: command: publish - args: --manifest-path tinysearch/Cargo.toml --dry-run + args: --dry-run - name: Publish if: startsWith(github.ref, 'refs/tags/') @@ -159,7 +159,6 @@ jobs: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} with: command: publish - args: --manifest-path tinysearch/Cargo.toml - name: Create Github release for Linux, Windows, and macOS if: startsWith(github.ref, 'refs/tags/') diff --git a/Cargo.toml b/Cargo.toml index e9e9a5a..9249291 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,60 @@ -[workspace] -members = [ - "tinysearch", -] \ No newline at end of file +[package] +name = "tinysearch" +authors = ["Matthias Endler "] +version = "0.8.0" +edition = "2021" +description = "A tiny search engine for static websites" +license = "Apache-2.0/MIT" +documentation = "https://github.com/mre/tinysearch/blob/master/README.md" +homepage = "https://github.com/mre/tinysearch" +repository = "https://github.com/mre/tinysearch" + +include = ["src/**/*"] + +[lib] + +[[bin]] +name = "tinysearch" +required-features = ["bin"] + +[dependencies] +bincode = "1.3.3" + + +argh = { version = "0.1.10", optional = true } +log = { version = "0.4.19", optional = true } +serde_json = { version = "1.0.103", optional = true } +anyhow = { version = "1.0.72", optional = true } +tempfile = { version = "3.7.0", optional = true } + +serde_derive = { version = "1.0.175", optional = true } +phf = { version = "0.11.2", optional = true } +strip_markdown = { version = "0.2.0", optional = true } +strum = { version = "0.25.0", features = ["derive"], optional = true } +toml_edit = { version = "0.19.14", optional = true } + + +[dependencies.serde] +version = "1.0.175" +features = ["derive"] + +[dependencies.xorf] +version = "0.8.1" +default-features = false +features = ["serde"] + + +[features] +default = [] +bin = [ + "argh", + "log", + "serde_json", + "anyhow", + "tempfile", + "serde_derive", + "phf", + "strip_markdown", + "strum", + "toml_edit", +] diff --git a/Dockerfile b/Dockerfile index c614e85..90ca22d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,7 +61,7 @@ COPY --from=binary-build /tmp/tinysearch/tinysearch/ /app/tinysearch # Initialize crate cache RUN echo '[{"title":"","body":"","url":""}]' > build.json && \ - tinysearch --engine-version 'path= "'$PWD'/tinysearch"' build.json && \ + tinysearch --engine-version 'path= "'$PWD'"' build.json && \ rm -rf /tmp/* ENTRYPOINT ["tinysearch"] diff --git a/Makefile b/Makefile index 81b51f2..b5026f1 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ build-docker: ### Build Docker image .PHONY: install install: ## Install tinysearch - cargo install --force --path tinysearch --features=bin + cargo install --force --path . --features=bin .PHONY: test test: ## Run unit tests diff --git a/tinysearch/assets/crate/Cargo.toml b/assets/crate/Cargo.toml similarity index 100% rename from tinysearch/assets/crate/Cargo.toml rename to assets/crate/Cargo.toml diff --git a/tinysearch/assets/crate/src/lib.rs b/assets/crate/src/lib.rs similarity index 92% rename from tinysearch/assets/crate/src/lib.rs rename to assets/crate/src/lib.rs index 3556be5..d8ef1b5 100644 --- a/tinysearch/assets/crate/src/lib.rs +++ b/assets/crate/src/lib.rs @@ -23,5 +23,6 @@ pub fn search_local(query: String, num_results: usize) -> Vec<&'static PostId> { #[cfg(feature = "bind")] #[wasm_bindgen] pub fn search(query: String, num_results: usize) -> JsValue { - serde_wasm_bindgen::to_value(&search_local(query, num_results)).unwrap() + serde_wasm_bindgen::to_value(&search_local(query, num_results)) + .expect("failed to serialize search result") } diff --git a/tinysearch/assets/demo.html b/assets/demo.html similarity index 100% rename from tinysearch/assets/demo.html rename to assets/demo.html diff --git a/tinysearch/assets/stopwords b/assets/stopwords similarity index 100% rename from tinysearch/assets/stopwords rename to assets/stopwords diff --git a/examples/yew-example-crate/Cargo.toml b/examples/yew-example-crate/Cargo.toml index adfd90e..2eb63c1 100644 --- a/examples/yew-example-crate/Cargo.toml +++ b/examples/yew-example-crate/Cargo.toml @@ -9,8 +9,8 @@ console_error_panic_hook = "0.1" wasm-bindgen = "0.2" ybc = "0.2" yew = "0.18" -example_search = {path = "./example-search", features=[]} +example_search = { path = "./example-search", features = [] } [workspace] -members = [] \ No newline at end of file +members = [] diff --git a/examples/yew-example-crate/Trunk.toml b/examples/yew-example-crate/Trunk.toml index 0613947..c6ffd71 100644 --- a/examples/yew-example-crate/Trunk.toml +++ b/examples/yew-example-crate/Trunk.toml @@ -10,4 +10,4 @@ stage = "pre_build" command = "sh" command_arguments = [ "-c", - "cd ../../ && cargo run --features=bin --release -- -m crate -p examples/yew-example-crate/example-search/ --crate-name example_search --engine-version 'path=\"../../../tinysearch\"' --non-top-level-crate fixtures/index.json"] + "cd ../../ && cargo run --features=bin --release -- -m crate -p examples/yew-example-crate/example-search/ --crate-name example_search --engine-version 'path=\"../../..\"' --non-top-level-crate fixtures/index.json"] diff --git a/examples/yew-example-storage/Cargo.lock b/examples/yew-example-storage/Cargo.lock index 8d62f2c..4363557 100644 --- a/examples/yew-example-storage/Cargo.lock +++ b/examples/yew-example-storage/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "anyhow" -version = "1.0.69" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" [[package]] name = "anymap" @@ -49,9 +49,9 @@ checksum = "cfa8873f51c92e232f9bac4065cddef41b714152812bfc5f7672ba16d6ef8cd9" [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" [[package]] name = "bytes" @@ -97,7 +97,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn", + "syn 1.0.109", ] [[package]] @@ -178,9 +178,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown", @@ -188,15 +188,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.6" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -222,12 +222,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" [[package]] name = "memchr" @@ -237,9 +234,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "nom" -version = "5.1.2" +version = "5.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b" dependencies = [ "lexical-core", "memchr", @@ -248,24 +245,24 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.23" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] @@ -281,41 +278,41 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.13" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "semver" -version = "1.0.16" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.152" +version = "1.0.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.152" +version = "1.0.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.27", ] [[package]] name = "serde_json" -version = "1.0.93" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" +checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" dependencies = [ "itoa", "ryu", @@ -348,29 +345,40 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "thiserror" -version = "1.0.38" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.38" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.27", ] [[package]] name = "tinysearch" -version = "0.7.0" +version = "0.8.0" dependencies = [ "bincode", "serde", @@ -379,9 +387,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.7" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775c11906edafc97bc378816b94585fbd9a054eabaf86fdd0ced94af449efab7" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "version_check" @@ -391,9 +399,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "serde", @@ -403,24 +411,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.27", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.34" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" dependencies = [ "cfg-if", "js-sys", @@ -430,9 +438,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -440,28 +448,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.27", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "web-sys" -version = "0.3.61" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" dependencies = [ "js-sys", "wasm-bindgen", @@ -469,9 +477,9 @@ dependencies = [ [[package]] name = "xorf" -version = "0.7.2" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0b223640dfbc22009679ce79f6777ae1db79add62a07345605777f69274a052" +checksum = "57901b00e3f8e14f4d20b8955bf8087ecb545cfe2ed8741c2a2dbc89847a1a29" dependencies = [ "serde", ] @@ -539,7 +547,7 @@ dependencies = [ "lazy_static", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -571,7 +579,7 @@ checksum = "4c0ace2924b7a175e2d1c0e62ee7022a5ad840040dcd52414ce5f410ab322dba" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "yew-router-route-parser", ] diff --git a/examples/yew-example-storage/Cargo.toml b/examples/yew-example-storage/Cargo.toml index b33550a..357c368 100644 --- a/examples/yew-example-storage/Cargo.toml +++ b/examples/yew-example-storage/Cargo.toml @@ -10,7 +10,7 @@ wasm-bindgen = "0.2" ybc = "0.2" yew = "0.18" once_cell = "*" -tinysearch = {path="../../tinysearch"} +tinysearch = { path = "../.." } [workspace] -members = [] \ No newline at end of file +members = [] diff --git a/examples/yew-example-storage/Trunk.toml b/examples/yew-example-storage/Trunk.toml index 62d4bb9..7660a47 100644 --- a/examples/yew-example-storage/Trunk.toml +++ b/examples/yew-example-storage/Trunk.toml @@ -6,5 +6,6 @@ dist = "dist" stage = "pre_build" command = "sh" command_arguments = [ - "-c", - "cd ../../ && cargo run --features=bin --release -- -m storage -p examples/yew-example-storage/src/ fixtures/index.json"] + "-c", + "cd ../../ && cargo run --features=bin --release -- -m storage --engine-version 'path=\"../../..\"' -p examples/yew-example-storage/src/ fixtures/index.json", +] diff --git a/tinysearch/src/bin/tinysearch.rs b/src/bin/tinysearch.rs similarity index 100% rename from tinysearch/src/bin/tinysearch.rs rename to src/bin/tinysearch.rs diff --git a/tinysearch/src/bin/utils/assets.rs b/src/bin/utils/assets.rs similarity index 100% rename from tinysearch/src/bin/utils/assets.rs rename to src/bin/utils/assets.rs diff --git a/tinysearch/src/bin/utils/index.rs b/src/bin/utils/index.rs similarity index 100% rename from tinysearch/src/bin/utils/index.rs rename to src/bin/utils/index.rs diff --git a/tinysearch/src/bin/utils/mod.rs b/src/bin/utils/mod.rs similarity index 100% rename from tinysearch/src/bin/utils/mod.rs rename to src/bin/utils/mod.rs diff --git a/tinysearch/src/bin/utils/storage.rs b/src/bin/utils/storage.rs similarity index 100% rename from tinysearch/src/bin/utils/storage.rs rename to src/bin/utils/storage.rs diff --git a/tinysearch/src/lib.rs b/src/lib.rs similarity index 100% rename from tinysearch/src/lib.rs rename to src/lib.rs diff --git a/tinysearch/Cargo.toml b/tinysearch/Cargo.toml deleted file mode 100644 index 2527f98..0000000 --- a/tinysearch/Cargo.toml +++ /dev/null @@ -1,60 +0,0 @@ -[package] -name = "tinysearch" -authors = ["Matthias Endler "] -version = "0.8.0" -edition = "2021" -description = "A tiny search engine for static websites" -license = "Apache-2.0/MIT" -documentation = "https://github.com/mre/tinysearch/blob/master/README.md" -homepage = "https://github.com/mre/tinysearch" -repository = "https://github.com/mre/tinysearch" - -include = ["src/**/*"] - -[lib] - -[[bin]] -name = "tinysearch" -required-features = ["bin"] - -[dependencies] -bincode = "1.3.3" - - -argh = { version = "0.1.10", optional = true } -log = { version = "0.4.19", optional = true } -serde_json = { version = "1.0.103", optional = true } -anyhow = { version = "1.0.72", optional = true } -tempfile = { version = "3.7.0", optional = true } - -serde_derive = { version = "1.0.175", optional = true } -phf = { version = "0.11.2", optional = true } -strip_markdown = { version = "0.2.0", optional = true } -strum = { version = "0.25.0", features = ["derive"], optional = true } -toml_edit = { version = "0.19.14", optional = true } - - -[dependencies.serde] -version = "1.0.175" -features = ["derive"] - -[dependencies.xorf] -version = "0.8.1" -default-features = false -features = ["serde"] - - -[features] -default = [] -bin = [ - "argh", - "log", - "serde_json", - "anyhow", - "tempfile", - "serde_derive", - "phf", - "strip_markdown", - "strum", - "toml_edit", -] From 9eae48f128b99b71c17d712d3bc4f6078db3662a Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Tue, 25 Jul 2023 11:44:26 +0200 Subject: [PATCH 34/58] update docs (#172) --- README.md | 8 +++++--- demo.html | 17 +++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 48ca46c..9b5109a 100644 --- a/README.md +++ b/README.md @@ -81,15 +81,14 @@ cargo install tinysearch A JSON file, which contains the content to index, is required as an input. Please take a look at the [example file](fixtures/index.json). +ℹ️ The `body` field in the JSON document is optional and can be skipped to just index post titles. + Once you created the index, you can run ``` tinysearch fixtures/index.json ``` -ℹ️ You can take a look at the code examples for different static site generators [here](https://github.com/mre/tinysearch/tree/master/howto). -ℹ️ The `body` field in the JSON document is optional and can be skipped to just index post titles. - This will create a WASM module and the JavaScript glue code to integrate it into your website. You can open the `demo.html` from any webserver to see the result. @@ -102,6 +101,9 @@ python3 -m http.server then browse to http://0.0.0.0:8000/demo.html to run the demo. +You can also take a look at the code examples for different static site +generators [here](https://github.com/mre/tinysearch/tree/master/howto). + ## Advanced Usage For advanced usage options, run diff --git a/demo.html b/demo.html index 1ce1cd6..1399a4c 100644 --- a/demo.html +++ b/demo.html @@ -11,22 +11,22 @@ // that we have compiled. // // Note that the `default` import is an initialization function which - // will "boot" the module and make it ready to use. Currently browsers + // will "boot" the module and make it ready to use. Currently, browsers // don't support natively imported WebAssembly as an ES module, but // eventually the manual initialization won't be required! - // import { search, default as init } from './tinysearch_engine.js'; import { search, default as init } from './tinysearch_engine.js'; + window.search = search; async function run() { - // First up we need to actually load the wasm file, so we use the - // default export to inform it where the wasm file is located on the - // server, and then we wait on the returned promise to wait for the - // wasm to be loaded. + // First up we need to actually load the WASM file, so we use the + // default export to inform the script where the WASM file is located on + // the server. Then we await the returned promise to wait for the + // WASM content to be loaded. // // Note that instead of a string here you can also pass in an instance // of `WebAssembly.Module` which allows you to compile your own module. - // Also note that the promise, when resolved, yields the wasm module's + // Also note that the promise, when resolved, yields the WASM module's // exports which is the same as importing the `*_bg` module in other // modes await init('./tinysearch_engine_bg.wasm'); @@ -36,11 +36,12 @@ - - - -

Search

- -

Results

-
    -
+ - \ No newline at end of file diff --git a/demo.html b/demo.html deleted file mode 100644 index 1399a4c..0000000 --- a/demo.html +++ /dev/null @@ -1,73 +0,0 @@ - - - - - - - - - - - - -

Search

- -

Results

-
    -
- - - \ No newline at end of file diff --git a/src/bin/tinysearch.rs b/src/bin/tinysearch.rs index b5c7304..eb5ee1a 100644 --- a/src/bin/tinysearch.rs +++ b/src/bin/tinysearch.rs @@ -7,7 +7,7 @@ use utils::assets; use utils::index; use utils::storage; -use anyhow::{bail, Context}; +use anyhow::{Context, bail}; pub use anyhow::{Error, Result}; use argh::FromArgs; use std::path::PathBuf; @@ -15,7 +15,7 @@ use std::process::{Command, Stdio}; use std::str::FromStr; use std::{env, fs}; use tempfile::TempDir; -use toml_edit::{value, Document}; +use toml_edit::{DocumentMut, value}; use index::Posts; use strum::{EnumString, IntoStaticStr}; @@ -26,9 +26,9 @@ fn ensure_exists(path: PathBuf) -> Result { } let path = path.canonicalize()?; if !path.exists() { - for path in fs::read_dir(&path)? { - println!("Name: {}", path.unwrap().path().display()) - } + fs::read_dir(&path)? + .map(|entry| entry.unwrap().path()) + .for_each(|path| println!("Name: {}", path.display())); bail!("Directory could not be created at {}", &path.display()); } Ok(path) @@ -73,7 +73,7 @@ enum OutputMode { } fn parse_engine_version(str: &str) -> Result { - let doc = str.parse::().map_err(|e| e.to_string())?; + let doc = str.parse::().map_err(|e| e.to_string())?; Ok(doc.as_table().clone()) } @@ -93,6 +93,10 @@ struct Opt { #[argh(switch)] version: bool, + /// create production-ready output without demo files + #[argh(switch)] + release: bool, + /// output mode #[argh(option, short = 'm', long = "mode", default = "OutputMode::Wasm")] output_mode: OutputMode, @@ -140,7 +144,7 @@ struct Opt { short = 'e', long = "engine-version", from_str_fn(parse_engine_version), - default = "format!(\"version=\\\"{}\\\"\", env!(\"CARGO_PKG_VERSION\")).parse::().unwrap().as_table().clone()" + default = "format!(\"version=\\\"{}\\\"\", env!(\"CARGO_PKG_VERSION\")).parse::().unwrap().as_table().clone()" )] engine_version: toml_edit::Table, @@ -184,18 +188,18 @@ impl Stage for Search { } fn build(&self) -> Result<(), Error> { - use tinysearch::{search as base_search, Storage}; + use tinysearch::{Storage, search as base_search}; let bytes = fs::read(&self.storage_file).with_context(|| { format!("Failed to read input file: {}", self.storage_file.display()) })?; let filters = Storage::from_bytes(&bytes)?.filters; let results = base_search(&filters, self.term.clone(), self.num_searches); - for result in results { + results.iter().for_each(|result| { println!( "Title: {}, Url: {}, Meta: {:?}", result.0, result.1, result.2 ); - } + }); Ok(()) } } @@ -270,7 +274,7 @@ impl Stage for Crate { self.out_path.display() ); let cargo_toml = self.out_path.join("Cargo.toml"); - let mut cargo_toml_contents = assets::CRATE_CARGO_TOML.parse::()?; + let mut cargo_toml_contents = assets::CRATE_CARGO_TOML.parse::()?; cargo_toml_contents["package"]["name"] = value(self.crate_name.clone()); cargo_toml_contents["dependencies"]["tinysearch"] = toml_edit::Item::Table(self.engine_version.clone()); @@ -301,6 +305,7 @@ struct Wasm { out_path: PathBuf, crate_path: DirOrTemp, optimize: bool, + release: bool, } impl Wasm { @@ -326,43 +331,159 @@ impl Stage for Wasm { out_path: ensure_exists(opt.out_path.clone())?, crate_path, optimize: opt.optimize, + release: opt.release, }) } fn build(self: &Wasm) -> Result<(), Error> { self.c.build().context("Failed generating crate")?; - println!("Compiling WASM module using wasm-pack"); + println!("Compiling WASM module using vanilla cargo build"); let crate_path = self.crate_path.path(); + let wasm_name = self.c.crate_name.replace('-', "_"); + + // Build with vanilla cargo run_output( - Command::new("wasm-pack") + Command::new("cargo") + .current_dir(&crate_path) .arg("build") - .arg(&crate_path) .arg("--target") - .arg("web") - .arg("--release") - .arg("--out-dir") - .arg(&self.out_path), + .arg("wasm32-unknown-unknown") + .arg("--release"), )?; - let wasm_name = self.c.crate_name.replace('-', "_"); + // Copy the WASM file to output directory + let wasm_file = format!("{}.wasm", &wasm_name); + let source_wasm = crate_path + .join("target/wasm32-unknown-unknown/release") + .join(&wasm_file); + let dest_wasm = self.out_path.join(&wasm_file); + fs::copy(&source_wasm, &dest_wasm).with_context(|| { + format!( + "Failed to copy {} to {}", + source_wasm.display(), + dest_wasm.display() + ) + })?; + + // Generate simple JS loader + let js_content = format!( + r#" +class TinySearchWasm {{ + constructor(wasmInstance) {{ + this.wasm = wasmInstance; + this.memory = wasmInstance.exports.memory; + this.searchFn = wasmInstance.exports.search; + this.freeFn = wasmInstance.exports.free_search_result; + }} + + // Convert JS string to WASM memory + stringToWasm(str) {{ + const bytes = new TextEncoder().encode(str + '\0'); + const ptr = this.wasm.exports.malloc ? this.wasm.exports.malloc(bytes.length) : this.allocString(bytes.length); + const mem = new Uint8Array(this.memory.buffer, ptr, bytes.length); + mem.set(bytes); + return ptr; + }} + + // Read string from WASM memory + wasmToString(ptr) {{ + if (ptr === 0) return null; + const mem = new Uint8Array(this.memory.buffer); + let end = ptr; + while (mem[end] !== 0) end++; + return new TextDecoder().decode(mem.subarray(ptr, end)); + }} + + // Simple string allocation fallback + allocString(len) {{ + // This is a simple fallback - WASM linear memory grows as needed + const pages = Math.ceil(len / 65536); + this.memory.grow(pages); + return this.memory.buffer.byteLength - len; + }} + + // Perform search + search(query, numResults = 5) {{ + const queryPtr = this.stringToWasm(query); + const resultPtr = this.searchFn(queryPtr, numResults); + + if (resultPtr === 0) {{ + return []; + }} + + const jsonStr = this.wasmToString(resultPtr); + this.freeFn(resultPtr); + + try {{ + return JSON.parse(jsonStr); + }} catch (e) {{ + console.error('Failed to parse search results:', e); + return []; + }} + }} +}} + +export async function init_tinysearch() {{ + try {{ + // Try streaming first (preferred) + const wasmModule = await WebAssembly.instantiateStreaming(fetch('./{wasm_file}')); + return new TinySearchWasm(wasmModule.instance); + }} catch (e) {{ + console.warn('Streaming failed, falling back to fetch + instantiate:', e.message); + // Fallback for servers with wrong MIME type + const response = await fetch('./{wasm_file}'); + const wasmBytes = await response.arrayBuffer(); + const wasmModule = await WebAssembly.instantiate(wasmBytes); + return new TinySearchWasm(wasmModule.instance); + }} +}} + +// Backward compatibility +export {{ TinySearchWasm as TinySearch }}; +"#, + wasm_file = wasm_file + ); + + let js_path = self.out_path.join(format!("{}.js", &wasm_name)); + if !self.release { + fs::write(&js_path, js_content) + .with_context(|| format!("Failed writing JS loader to {}", js_path.display()))?; + } + + // Optional optimization if self.optimize { - let wasm_file = format!("{}_bg.wasm", &wasm_name); - run_output( + if run_output( Command::new("wasm-opt") .current_dir(&self.out_path) .arg("-Oz") .arg("-o") .arg(&wasm_file) .arg(&wasm_file), - )?; + ) + .is_ok() + { + println!("Optimized WASM with wasm-opt"); + } else { + println!("wasm-opt not available, skipping optimization"); + } + } + + if !self.release { + let html_path = self.out_path.join("demo.html"); + fs::write( + &html_path, + assets::DEMO_HTML.replace("{WASM_NAME}", &wasm_name), + ) + .with_context(|| format!("Failed writing demo.html to {}", &html_path.display()))?; + println!("All done! WASM module at: {}", dest_wasm.display()); + println!("JS loader at: {}", js_path.display()); + println!("Demo at: {}", html_path.display()); + } else { + println!("Created production-ready WASM module"); + println!("See docs for usage instructions"); + println!("Path: {}", dest_wasm.display()); + println!("Size: {} bytes", dest_wasm.metadata()?.len()); } - let html_path = self.out_path.join("demo.html"); - fs::write( - &html_path, - assets::DEMO_HTML.replace("{WASM_NAME}", &wasm_name), - ) - .with_context(|| format!("Failed writing demo.html to {}", &html_path.display()))?; - println!("All done! Open the output folder with a web server to try the demo."); Ok(()) } } diff --git a/src/bin/utils/storage.rs b/src/bin/utils/storage.rs index 33249cd..de92182 100644 --- a/src/bin/utils/storage.rs +++ b/src/bin/utils/storage.rs @@ -40,7 +40,7 @@ fn tokenize(words: &str, stopwords: &HashSet) -> HashSet { } // Read all posts and generate Bloomfilters from them. -#[no_mangle] +#[unsafe(no_mangle)] pub fn generate_filters(posts: HashMap>) -> Result { // Create a dictionary of {"post name": "lowercase word set"}. split_posts = // {name: set(re.split("\W+", contents.lower())) for name, contents in @@ -59,32 +59,32 @@ pub fn generate_filters(posts: HashMap>) -> Result = tokenize(&post_id.0, &stopwords); - let content: Vec = if let Some(body) = body { - body.union(&title).cloned().collect() - } else { - title.into_iter().collect() - }; - let filter = HashProxy::from(&content); - filters.push((post_id, filter)); - } + let filters = split_posts + .into_iter() + .map(|(post_id, body)| { + // Also add title to filter + let title: HashSet = tokenize(&post_id.0, &stopwords); + let content: Vec = body.map_or_else( + || title.clone().into_iter().collect(), + |body| body.union(&title).cloned().collect(), + ); + let filter = HashProxy::from(&content); + (post_id, filter) + }) + .collect(); trace!("Done"); Ok(filters) } // prepares the files in the given directory to be consumed by the generator pub fn prepare_posts(posts: Posts) -> HashMap> { - let mut prepared: HashMap> = HashMap::new(); - for post in posts { - debug!("Analyzing {}", post.url); - prepared.insert((post.title, post.url, post.meta), post.body); - } - prepared + posts + .into_iter() + .inspect(|post| debug!("Analyzing {}", post.url)) + .map(|post| ((post.title, post.url, post.meta), post.body)) + .collect() } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 8d85e7d..43847fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,8 @@ +//! tinysearch - A tiny search engine for static websites +//! +//! This crate provides a fast, memory-efficient search engine that can be compiled +//! to WebAssembly for client-side search functionality on static websites. + use bincode::Error as BincodeError; use serde::{Deserialize, Serialize}; use std::cmp::Reverse; @@ -5,15 +10,26 @@ use std::collections::hash_map::DefaultHasher; use std::convert::From; use xorf::{Filter as XorfFilter, HashProxy, Xor8}; +/// Title of a post type Title = String; +/// URL of a post type Url = String; +/// Optional metadata for a post type Meta = Option; + +/// Represents a post with its title, URL, and optional metadata pub type PostId = (Title, Url, Meta); + +/// A post with its associated Xor filter for fast lookups pub type PostFilter = (PostId, HashProxy); + +/// Collection of all post filters pub type Filters = Vec; +/// Storage container for serialized search index #[derive(Serialize, Deserialize)] pub struct Storage { + /// Vector of post filters for search functionality pub filters: Filters, } @@ -23,12 +39,14 @@ impl From for Storage { } } +/// Trait for scoring search terms against a filter pub trait Score { + /// Returns the number of search terms that match this filter fn score(&self, terms: &[String]) -> usize; } -// the score denotes the number of terms from the query that are contained in the -// current filter +/// Implementation of scoring for Xor filters +/// The score denotes the number of terms from the query that are contained in the current filter impl Score for HashProxy { fn score(&self, terms: &[String]) -> usize { terms.iter().filter(|term| self.contains(term)).count() @@ -36,23 +54,27 @@ impl Score for HashProxy { } impl Storage { + /// Serializes the storage to bytes using bincode pub fn to_bytes(&self) -> Result, BincodeError> { let encoded: Vec = bincode::serialize(&self)?; Ok(encoded) } + /// Deserializes storage from bytes using bincode pub fn from_bytes(bytes: &[u8]) -> Result { let decoded: Filters = bincode::deserialize(bytes)?; Ok(Storage { filters: decoded }) } } +/// Type alias for the filter used in search pub type Filter = HashProxy; +/// Weight multiplier for title matches vs body matches const TITLE_WEIGHT: usize = 3; -// Wrapper around filter score, that also scores the post title -// Post title score has a higher weight than post body +/// Calculates a combined score for a post based on title and body matches +/// Post title matches are weighted higher than body matches fn score(title: &str, search_terms: &[String], filter: &Filter) -> usize { let title_terms: Vec = tokenize(title); let title_score: usize = search_terms @@ -62,6 +84,7 @@ fn score(title: &str, search_terms: &[String], filter: &Filter) -> usize { TITLE_WEIGHT * title_score + filter.score(search_terms) } +/// Tokenizes a string into lowercase words, removing empty tokens fn tokenize(s: &str) -> Vec { s.to_lowercase() .split_whitespace() @@ -69,6 +92,16 @@ fn tokenize(s: &str) -> Vec { .map(String::from) .collect() } + +/// Performs a search query against the provided filters +/// +/// # Arguments +/// * `filters` - The search index containing all posts and their filters +/// * `query` - The search query string +/// * `num_results` - Maximum number of results to return +/// +/// # Returns +/// Vector of `PostId` references, sorted by relevance score (highest first) pub fn search(filters: &'_ Filters, query: String, num_results: usize) -> Vec<&'_ PostId> { let search_terms: Vec = tokenize(&query); let mut matches: Vec<(&PostId, usize)> = filters diff --git a/tests/integration_test.rs b/tests/integration_test.rs new file mode 100644 index 0000000..1247825 --- /dev/null +++ b/tests/integration_test.rs @@ -0,0 +1,111 @@ +use std::process::Command; +use tempfile::TempDir; + +#[test] +fn test_cli_version() { + let output = Command::new("cargo") + .args(["run", "--features=bin", "--", "--version"]) + .output() + .expect("Failed to execute command"); + + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(stdout.starts_with("tinysearch")); +} + +#[test] +fn test_cli_wasm_mode() { + // Check if wasm32-unknown-unknown target is available + let target_check = Command::new("rustup") + .args(["target", "list", "--installed"]) + .output() + .expect("Failed to check installed targets"); + + let installed_targets = String::from_utf8_lossy(&target_check.stdout); + if !installed_targets.contains("wasm32-unknown-unknown") { + panic!( + "wasm32-unknown-unknown target is not installed. Install it with: rustup target add wasm32-unknown-unknown" + ); + } + + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + + let output = Command::new("cargo") + .args([ + "run", + "--features=bin", + "--", + "-m", + "wasm", + "-p", + temp_dir.path().to_str().unwrap(), + "fixtures/index.json", + ]) + .output() + .expect("Failed to execute command"); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let stdout = String::from_utf8_lossy(&output.stdout); + eprintln!("WASM build failed. Stdout: {}", stdout); + eprintln!("Stderr: {}", stderr); + panic!("WASM build failed unexpectedly"); + } + + // Verify that WASM and JS files were created + let wasm_files: Vec<_> = std::fs::read_dir(&temp_dir) + .expect("Failed to read output directory") + .filter_map(|entry| entry.ok()) + .filter(|entry| { + entry + .path() + .extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext == "wasm" || ext == "js") + }) + .collect(); + + assert!(!wasm_files.is_empty(), "No WASM/JS files were generated"); + + // Specifically check for both .wasm and .js files + let has_wasm = wasm_files + .iter() + .any(|entry| entry.path().extension().and_then(|ext| ext.to_str()) == Some("wasm")); + let has_js = wasm_files + .iter() + .any(|entry| entry.path().extension().and_then(|ext| ext.to_str()) == Some("js")); + + assert!(has_wasm, "No .wasm file was generated"); + assert!(has_js, "No .js file was generated"); +} + +#[test] +fn test_cli_storage_mode() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + + let output = Command::new("cargo") + .args([ + "run", + "--features=bin", + "--", + "-m", + "storage", + "-p", + temp_dir.path().to_str().unwrap(), + "fixtures/index.json", + ]) + .output() + .expect("Failed to execute command"); + + // Storage mode should work with the provided fixtures + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + eprintln!("Command failed: {}", stderr); + } + + assert!(output.status.success()); + + // Check that storage file was created + let storage_path = temp_dir.path().join("storage"); + assert!(storage_path.exists(), "Storage file should be created"); +} From 6580039fa917536108e97aad7aab0810ff0e358a Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 14 Aug 2025 10:38:54 +0200 Subject: [PATCH 46/58] Clean up Docker image --- Dockerfile | 25 ++++++++----------------- Makefile | 13 +++++++++++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4b58fd0..7b9565b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,5 @@ # Docker tinysearch with deps # - binaryen -# - wasm-pack -# - terser ARG TINY_REPO=https://github.com/tinysearch/tinysearch ARG TINY_BRANCH=master @@ -22,22 +20,13 @@ RUN apk add --update --no-cache --virtual \ gcc \ curl \ git \ - npm \ - gcc \ ca-certificates \ libc6-compat \ binaryen && \ - ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld64.so.1 && \ - npm install terser -g - -# Verify the installation -RUN terser --version + ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld64.so.1 -# Install wasm-pack -RUN curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - -# Verify the installation -RUN wasm-pack --version && which wasm-pack +# Install WASM target +RUN rustup target add wasm32-unknown-unknown # Clone the repo and build the binary RUN git clone --branch "$TINY_BRANCH" "$TINY_REPO" tinysearch && \ @@ -53,9 +42,11 @@ WORKDIR /app RUN apk add --update --no-cache libc6-compat musl-dev binaryen openssl-dev && \ ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld64.so.1 -# Copy the build binaries and tinysearch directory -COPY --from=builder /usr/local/bin/ /usr/local/bin/ -COPY --from=builder /usr/local/cargo/bin/ /usr/local/bin/ +# Install WASM target for runtime compilation +RUN rustup target add wasm32-unknown-unknown + +# Copy the tinysearch binary and source directory +COPY --from=builder /usr/local/cargo/bin/tinysearch /usr/local/bin/ # Copy tinysearch build directory to be used as the engine (see `--engine-version` option below) # This is done because we want to use the same image for building and running tinysearch # and not depend on crates.io for the engine diff --git a/Makefile b/Makefile index 766ae4f..943b9b9 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ SHELL := /bin/bash .DEFAULT_GOAL := help # PHONY targets -.PHONY: help clean build build-release build-docker install test test-unit test-integration +.PHONY: help clean build build-release build-docker docker install test test-unit test-integration .PHONY: lint fmt check audit run example demo deps update .PHONY: ci-check ci-test ci-build ci-lint ci-fmt ci-audit @@ -17,7 +17,7 @@ help: ## Display this help message clean: ## Clean up build artifacts cargo clean - rm -rf wasm_output target/criterion demo + rm -rf wasm_output target/criterion demo docker_output rm -rf examples/*/dist find . -name "*.wasm" -type f -delete find . -name "*.js" -type f -path "*/pkg/*" -delete @@ -31,6 +31,15 @@ build-release: ## Build the project in release mode build-docker: ## Build Docker image docker build --progress=plain -t tinysearch/cli . +docker: ## Build and run Docker container with sample data + @echo "🐳 Building Docker image..." + @docker build -t tinysearch/cli . + @echo "🚀 Running Docker container with sample data..." + @mkdir -p docker_output + @docker run --rm -v $(PWD)/docker_output:/app/output -v $(PWD)/fixtures:/app/fixtures tinysearch/cli -m wasm -p /app/output /app/fixtures/index.json + @echo "📂 Output files created in docker_output/" + @ls -la docker_output/ + install: ## Install tinysearch locally cargo install --force --path . --features=bin From ee1b86cf9cffb75ddc11123ac93d87230c0857a9 Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 14 Aug 2025 10:49:39 +0200 Subject: [PATCH 47/58] Bump version --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d53610f..29e769d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -325,7 +325,7 @@ dependencies = [ [[package]] name = "tinysearch" -version = "0.8.2" +version = "0.9.0" dependencies = [ "anyhow", "argh", diff --git a/Cargo.toml b/Cargo.toml index c747c64..dd7f670 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tinysearch" authors = ["Matthias Endler "] -version = "0.8.2" +version = "0.9.0" edition = "2024" rust-version = "1.85" resolver = "2" From 23784a87d504bf3d4d533f7038b295d5b4f8f1bb Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 14 Aug 2025 10:52:36 +0200 Subject: [PATCH 48/58] Bump version --- assets/crate/Cargo_orig.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/crate/Cargo_orig.toml b/assets/crate/Cargo_orig.toml index a58692e..f67e7e4 100644 --- a/assets/crate/Cargo_orig.toml +++ b/assets/crate/Cargo_orig.toml @@ -3,7 +3,7 @@ [package] name = "THIS_VALUE_SHOULD_BE_FILLED" authors = ["Matthias Endler "] -version = "0.8.2" +version = "0.9.0" edition = "2024" rust-version = "1.85" description = "A tiny search engine for static websites" From 23299ca885a95d7a23146d3c31c17a099abfe6c9 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 18 Aug 2025 14:41:51 +0200 Subject: [PATCH 49/58] Exclude `pkg` from releases --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index dd7f670..6e0ed5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ license = "Apache-2.0/MIT" documentation = "https://github.com/mre/tinysearch/blob/master/README.md" homepage = "https://github.com/mre/tinysearch" repository = "https://github.com/mre/tinysearch" +exclude = ["pkg/**/*"] readme = "README.md" include = ["src", "assets", "README.md"] From 3ef003f086b6ed715a88758c32bfdcd2b3352200 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 18 Aug 2025 14:43:47 +0200 Subject: [PATCH 50/58] Reorder entries in `Cargo.toml` --- Cargo.toml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6e0ed5f..ccae25d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,18 +1,16 @@ [package] name = "tinysearch" -authors = ["Matthias Endler "] version = "0.9.0" +authors = ["Matthias Endler "] edition = "2024" rust-version = "1.85" resolver = "2" description = "A tiny search engine for static websites" +readme = "README.md" license = "Apache-2.0/MIT" -documentation = "https://github.com/mre/tinysearch/blob/master/README.md" homepage = "https://github.com/mre/tinysearch" repository = "https://github.com/mre/tinysearch" -exclude = ["pkg/**/*"] - -readme = "README.md" +documentation = "https://github.com/mre/tinysearch/blob/master/README.md" include = ["src", "assets", "README.md"] [lib] From 75e02ba501a0c12a6a0c17b1ff2d8c6acfcaec39 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 18 Aug 2025 15:25:24 +0200 Subject: [PATCH 51/58] Update Zola howto --- howto/zola.md | 122 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 90 insertions(+), 32 deletions(-) diff --git a/howto/zola.md b/howto/zola.md index d5c8dbb..79bd112 100644 --- a/howto/zola.md +++ b/howto/zola.md @@ -1,52 +1,110 @@ -# Building the search index with Zola +# Building a Search Index for tinysearch with Zola -1. Create a template, which iterates over all pages and creates our JSON structure. +This guide shows how to create a JSON search index for tinysearch using Zola's template engine (Tera). The process involves creating templates that extract content from all your pages and format it as JSON for tinysearch to process. -`macros/create_data.html`: +## Overview -```liquid -{%- macro from_section(section) -%} -{%- set section = get_section(path=section) -%} +We'll create: +1. A template that outputs JSON +2. A content file that generates the search index + +## Step 1: Create the Search Index Template + +Create `templates/tinysearch_index.html`: + +```tera +{% set section = get_section(path="_index.md") %} +[ {%- for post in section.pages -%} -{%- if not post.draft -%} +{% if not post.draft %} { "title": {{ post.title | striptags | json_encode | safe }}, "url": {{ post.permalink | json_encode | safe }}, -"body": {{ post.content | striptags | json_encode | safe }} -} -{%- if not loop.last -%},{%- endif %} -{%- endif -%} +"body": "{{ post.content | striptags | replace(from="{", to=" ") | replace(from="}", to=" ") | replace(from='"', to=" ") | replace(from="'", to="") | replace(from="\", to="") | escape }}" +}{% if not loop.last %},{% endif %} +{% endif %} {%- endfor -%} -{%- if section.subsections -%} -, -{%- for subsection in section.subsections -%} -{{ self::from_section(section=subsection) }} -{%- endfor -%} -{%- endif -%} -{%- endmacro from_section -%} +] ``` -`templates/json.html`: - -```liquid -{%- import "macros/create_data.html" as create_data -%} -[{{ create_data::from_section(section="_index.md") }}] -``` +**What this template does:** +- Gets the root section (`_index.md`) which contains all site pages +- Iterates through all pages in the site +- Skips draft pages +- Extracts the title, URL, and content for each page +- Uses special filtering for the body content to handle JSON escaping +- Outputs properly formatted JSON array -2. Create a static page using the template. +## Step 2: Create the Search Index Page -`content/static/json.md`: +Create `content/tinysearch.md`: -``` +```toml +++ -path = "json" -template = "json.html" +title = "Search Index" +path = "tinysearch.json" +template = "tinysearch_index.html" +date = 2025-01-01 +++ ``` -After running `zola build`, the output JSON file should be in `public/json/index.html`. -You can then call tinysearch on the index to create your WASM: +**Important notes:** +- The `path` parameter determines the output URL (`/tinysearch.json`) +- The `template` parameter specifies which template to use +- The file extension in `path` doesn't affect the actual content type + +## Step 3: Build and Process + +1. **Build your Zola site:** + ```bash + zola build + ``` + +2. **Find the generated JSON:** + The search index will be at `public/tinysearch.json/index.html` + +3. **Run tinysearch:** + ```bash + tinysearch --optimize --path static public/tinysearch.json/index.html + ``` + +## Customization Options +### Including Specific Sections Only + +To limit indexing to specific sections, modify the macro call: + +```tera +[{{ tinysearch::extract_content(section="blog/_index.md") }}] ``` -tinysearch --optimize --path static public/json/index.html + +### Adding Metadata + +You can extend the macro to include additional metadata: + +```tera +{ + "title": {{ page.title | striptags | json_encode | safe }}, + "url": {{ page.permalink | json_encode | safe }}, + "body": {{ page.content | striptags | json_encode | safe }}, + "date": {{ page.date | json_encode | safe }}, + "tags": {{ page.taxonomies.tags | json_encode | safe }} +} ``` + +## Troubleshooting + +### Empty Output +- Check that your sections contain non-draft pages +- Verify the section path in the macro call matches your content structure + +### JSON Syntax Errors +- Ensure proper comma placement between items +- Use `json_encode` filter for all dynamic content +- Test the generated JSON with a validator + +### Build Errors +- Make sure the `tinysearch_macros.html` file is in the `templates/` directory +- Check that all template syntax is correct (Tera uses `{%` and `{{` syntax) + +This setup will create a comprehensive search index that tinysearch can process into an efficient WebAssembly search module for your Zola site. From 3621d985b6acf939ff3e0fb3db199f8fcad9db8a Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 18 Aug 2025 15:28:13 +0200 Subject: [PATCH 52/58] Update example JSON index --- fixtures/index.json | 238 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 204 insertions(+), 34 deletions(-) diff --git a/fixtures/index.json b/fixtures/index.json index e2bfcf9..67407dd 100644 --- a/fixtures/index.json +++ b/fixtures/index.json @@ -1,167 +1,337 @@ [ + { + "title": "How To Review Code", + "url": "https://endler.dev/2025/how-to-review-code/", + "body": "I’ve been reviewing other people’s code for a while now, more than two decades to be precise. Nowadays, I spend around 50-70% of my time reviewing code in some form or another. It’s what I get paid to do, alongside systems design. Over time, I learned a thing or two about how to review code effectively. I focus on different things now than when I started. Think About The Big Picture Bad reviews are narrow in scope. They focus on syntax, style, and minor issues instead of maintainability and extensibility. Good reviews look at not only the changes, but also what problems the changes solve, what future issues might arise, and how a change fits into the overall design of the system. I like to look at the lines that weren’t changed. They often tell the true story. For example, often people forget to update a related section of the codebase or the docs. This can lead to bugs, confusion, breaking changes, or security issues. Be thorough and look at all call-sites of the new code. Have they been correctly updated? Are the tests still testing the right thing? Are the changes in the right place? Here’s a cheat sheet of questions I ask myself when reviewing code: How does this code fit into the rest of the system? What’s its interaction with other parts of the codebase? How does it affect the overall architecture? Does it impact future planned work? These questions have more to do with systems design than with the changes themselves. Don’t neglect the bigger picture because systems become brittle if you accept bad changes. Code isn’t written in isolation. The role of more experienced developers is to reduce operational friction and handle risk management for the project. The documentation, the tests, and the data types are equally as important as the code itself. Always keep an eye out for better abstractions as the code evolves. Naming Is Everything I spend a big chunk of my time thinking about good names when reviewing code. Naming things is hard, which is why it’s so important to get it right. Often, it’s the most important part of a code review. It’s also the most subjective part, which makes it tedious because it’s hard to distinguish between nitpicking and important naming decisions. Names encapsulate concepts and serve as “building blocks” in your code. Bad names are the code smell that hint at problems running deep. They increase cognitive overhead by one or more orders of magnitude. For example, say we have a struct that represents a player’s stats in a game: struct Player username: String, score: i32, level: i32, I often see code like this: &#x2F;&#x2F; Bad: using temporary&#x2F;arbitrary names creates confusion fn update_player_stats(player: Player, bonus_points: i32, level_up: bool) -&amp;gt; Player let usr = player.username.trim().to_lowercase(); let updated_score = player.score + bonus_points; let l = if level_up player.level + 1 else player.level ; let l2 = if l &amp;gt; 100 100 else l ; Player username: usr, score: updated_score, level: l2, This code is hard to read and understand. What is usr, updated_score, and l2? The purpose is not conveyed clearly. This builds up cognitive load and make it harder to follow the logic. That’s why I always think of the most fitting names for variables, even if it feels like I’m being pedantic. &#x2F;&#x2F; Good: meaningful names that describe the transformation at each step fn update_player_stats(player: Player, bonus_points: i32, level_up: bool) -&amp;gt; Player &#x2F;&#x2F; Each variable name describes what the value represents let username = player.username.trim().to_lowercase(); let score = player.score + bonus_points; &#x2F;&#x2F; Use shadowed variables to clarify intent let level = if level_up player.level + 1 else player.level ; let level = if level &amp;gt; 100 100 else level ; &#x2F;&#x2F; If done correctly, the final variable names &#x2F;&#x2F; often match the struct&amp;#39;s field names Player username, score, level, Good names become even more critical in larger codebases where values are declared far away from where they’re used and where many developers have to have a shared understanding of the problem domain. Don’t Be Afraid To Say “No” I have to decline changes all the time and it’s never easy. After all someone put in a lot of effort and they want to see their work accepted. Avoid sugarcoating your decision or trying to be nice. Be objective, explain your reasoning and provide better alternatives. Don’t dwell on it, but focus on the next steps. It’s better to say no than to accept something that isn’t right and will cause problems down the road. In the future it will get even harder to deny a change once you’ve set a precedent. That’s the purpose of the review process: there is no guarantee that the code will be accepted. In open source, many people will contribute code that doesn’t meet your standards. There needs to be someone who says “no” and this is a very unpopular job (ask any open source maintainer). However, great projects need gatekeepers because the alternative is subpar code and eventually unmaintainable projects. At times, people will say “let’s just merge this and fix it later.” I think that’s a slippery slope. It can lead to tech debt and additional work later on. It’s hard to stand your ground, but it’s important to do so. If you see something that isn’t right, speak up. When it gets hard, remember that you’re not rejecting the person, you’re rejecting the code. Remind people that you appreciate their effort and that you want to help them improve. Even though you’ll develop an intuition for what to focus on in reviews, you should still back it up with facts. If you find yourself saying “no” to the same thing over and over again, consider writing a style guide or a set of guidelines for your team. Be gracious but decisive; it’s just code. Code Review Is Communication Code reviews aren’t just about code; people matter too. Building a good relationship with your coworkers is important. I make it a point to do the first couple of reviews together in a pair programming session if possible. This way, you can learn from each other’s communication style. Building trust and getting to know each other works well this way. You should repeat that process later if you notice a communication breakdown or misunderstanding. Use Multiple Iterations Of Reviews “Can you take a quick look at this PR? I want to merge it today.” There often is an expectation that code reviews are a one-time thing. That’s not how it works. Instead, code reviews are an iterative process. Multiple iterations should be expected to get the code right. In my first iteration, I focus on the big picture and the overall design. Once I’m done with that, I go into the details. The goal shouldn’t be to merge as quickly as possible, but to accept code that is of high quality. Otherwise, what’s the point of a code review in the first place? That’s a mindset shift that’s important to make. Reviews aren’t exclusively about pointing out flaws, they’re also about creating a shared understanding of the code within the team. I often learn the most about writing better code by reviewing other people’s code. I’ve also gotten excellent feedback on my own code from excellent engineers. These are invaluable “aha moments” that help you grow as a developer. Experts spent their valuable time reviewing my code, and I learned a lot from it. I think everybody should experience that once in their career. Don’t Be A Jerk From time to time, you’ll disagree with the author. Being respectful and constructive is important. Avoid personal attacks or condescending language. Don’t say “this is wrong.” Instead, say “I would do it this way.” If people are hesitant, ask a few questions to understand their reasoning. “Will this break existing workflows if we do it this way?” “Which alternatives have you considered?” “What happens if you call this function with an empty array?” “If I don’t set this value, what will be the error message presented to the user?” These “Socratic questions”1 help the author think about their decisions and can lead to better designs. People should enjoy receiving your feedback. If not, revisit your review style. Only add comments that you yourself would be happy to receive. From time to time, I like to add positive comments like “I like this” or “this is a great idea.” Keeping the author motivated and showing that you appreciate their work goes a long way. If Possible, Try To Run The Code It’s easy to miss subtle details when you look at code for too long. Having a local copy of the code that I can play with helps me a lot. I try to run the code, the tests, and the linters if I can. Checking out the branch, moving things around, breaking things, and trying to understand how it works is part of my review process. User-facing changes like UI changes or error messages are often easier to spot when you run the code and try to break it. After that, I revert the changes and, if needed, write down my findings in a comment. Better understanding can come from this approach. Be Upfront About Your Availability Code reviews are often a bottleneck in the development process, because they can’t be fully automated: there’s a human in the loop who has to look at the code and provide feedback. But if you wait for your colleagues to review your code, that can lead to frustration. Avoid being that person. Sometimes you won’t have time to review code and that is okay. If you can’t review the code in a reasonable time, let the author know. I’m still working on this, but I try to be more proactive about my availability and set clear expectations. Never Stop Learning Code reviews are my favorite way to learn new things. I learn new techniques, patterns, new libraries, but most importantly, how other people approach problems. I try to learn one new thing with each review. It’s not wasted time, if it helps the team improve and grow as a whole. Don’t Be Nitpicky Formatters exist for a reason: leave whitespace and formatting to the tools. Save your energy for issues that truly matter. Focus on logic, design, maintainability, and correctness. Avoid subjective preferences that don’t impact code quality. Ask yourself: Does this affect functionality or would it confuse future developers? If not, let it go. Focus On The Why, Not The How When reviewing code, focus on the reasoning behind the changes. This has a much better chance of success than pointing out flaws without any reasoning. Consider the following two code review comments. The first one is unhelpful and dismissive. The second suggests an alternative, links to the documentation, and explains why the change could lead to problems down the road. Which one would you prefer to receive? I realize that this requires more time and effort, but it’s worth it! Most of the time, the author will appreciate it and avoid making the same mistake in the future. There is a compound effect from helpful reviews over time. Don’t Be Afraid To Ask Stupid Questions Asking is better than assuming. If you don’t understand something, ask the author to explain it. Chances are, you’re not the only one who doesn’t get it. Often, the author will be happy to explain their reasoning. Better understanding of the code and the system as a whole can result from this. It can also help the author see things from a different perspective. Perhaps they’ll learn that their assumptions were wrong or that the system isn’t self-explanatory. Perhaps there’s missing documentation? Asking great questions is a superpower. Ask For Feedback On Your Reviewing Style From time to time, ask the author for feedback on your feedback: Have you been too harsh&#x2F;nitpicky&#x2F;slow&#x2F;sloppy? Did you point out the right things? Did your feedback help them? Do they have suggestions for improvement? Basically, you ask them to review your review process, heh. Learning how to review code is a skill that needs constant practice and refinement. Good luck finding your own style. Thanks for pointing out that term to me, Lucca! ↩ " + }, + { + "title": "Repeat Yourself", + "url": "https://endler.dev/2025/repeat-yourself/", + "body": "One of the most repeated pieces of advice throughout my career in software has been “don’t repeat yourself,” also known as the DRY principle. For the longest time, I took that at face value, never questioning its validity. That was until I saw actual experts write code: they copy code all the time1. I realized that repeating yourself has a few great benefits. Why People Love DRY The common wisdom is that if you repeat yourself, you have to fix the same bug in multiple places, but if you have a shared abstraction, you only have to fix it once. Another reason why we avoid repetition is that it makes us feel clever. “Look, I know all of these smart ways to avoid repetition! I know how to use interfaces, generics, higher-order functions, and inheritance!” Both reasons are misguided. There are many benefits of repeating yourself that might get us closer to our goals in the long run. Keeping Up The Momentum When you’re writing code, you want to keep the momentum going to get into a flow state. If you constantly pause to design the perfect abstraction, it’s easy to lose momentum. Instead, if you allow yourself to copy-paste code, you keep your train of thought going and work on the problem at hand. You don’t introduce another problem of trying to find the right abstraction at the same time. It’s often easier to copy existing code and modify it until it becomes too much of a burden, at which point you can go and refactor it. I would argue that “writing mode” and “refactoring mode” are two different modes of programming. During writing mode, you want to focus on getting the idea down and stop your inner critic, which keeps telling you that your code sucks. During refactoring mode, you take the opposite role: that of the critic. You look for ways to improve the code by finding the right abstractions, removing duplication, and improving readability. Keep these two modes separate. Don’t try to do both at the same time.2 Finding The Right Abstraction Is Hard When you start to write code, you don’t know the right abstraction just yet. But if you copy code, the right abstraction reveals itself; it’s too tedious to copy the same code over and over again, at which point you start to look for ways to abstract it away. For me, this typically happens after the first copy of the same code, but I try to resist the urge until the 2nd or 3rd copy. If you start too early, you might end up with a bad abstraction that doesn’t fit the problem. You know it’s wrong because it feels clunky. Some typical symptoms include: Generic names that don’t convey intent, e.g., render_pdf_file instead of generate_invoice Difficult to understand without additional context The abstraction is only used in one or two places Tight coupling to implementation details It’s Hard To Get Rid Of Wrong Abstractions We easily settle for the first abstraction that comes to mind, but most often, it’s not the right one. And removing the wrong abstraction is hard work, because now the data flow depends on it. We also tend to fall in love with our own abstractions because they took time and effort to create. This makes us reluctant to discard them even when they no longer fit the problem—it’s a sunk cost fallacy. It gets worse when other programmers start to depend on it, too. Then you have to be careful about changing it, because it might break other parts of the codebase. Once you introduce an abstraction, you have to work with it for a long time, sometimes forever. If you had a copy of the code instead, you could just change it in one place without worrying about breaking anything else. Duplication is far cheaper than the wrong abstraction —Sandi Metz, The Wrong Abstraction Better to wait until the last moment to settle on the abstraction, when you have a solid understanding of the problem space.3 The Mental Overhead of Abstractions Abstraction reduces code duplication, but it comes at a cost. Abstractions can make code harder to read, understand, and maintain because you have to jump between multiple levels of indirection to understand what the code does. The abstraction might live in different files, modules, or libraries. The cost of traversing these layers is high. An expert programmer might be able to keep a few levels of abstraction in their head, but we all have a limited context window (which depends on familiarity with the codebase). When you copy code, you can keep all the logic in one place. You can just read the whole thing and understand what it does. Resist The Urge Of Premature Abstraction Sometimes, code looks similar but serves different purposes. For example, consider two pieces of code that calculate a sum by iterating over a collection of items. total = 0 for item in shopping_cart: total += item.price * item.quantity And elsewhere in the code, we have total = 0 for item in package_items: total += item.weight * item.rate In both cases, we iterate over a collection and calculate a total. You might be tempted to introduce a helper function, but the two calculations are very different. After a few iterations, these two pieces of code might evolve in different directions: def calculate_total_price(shopping_cart): if not shopping_cart: raise ValueError(&amp;quot;Shopping cart cannot be empty&amp;quot;) total = 0.0 for item in shopping_cart: # Round for financial precision total += round(item.price * item.quantity, 2) return total In contrast, the shipping cost calculation might look like this: def calculate_shipping_cost(package_items, destination_zone): # Use higher of actual weight vs dimensional weight total_weight = sum(item.weight for item in package_items) total_volume = sum(item.length * item.width * item.height for item in package_items) dimensional_weight = total_volume &#x2F; 5000 # FedEx formula billable_weight = max(total_weight, dimensional_weight) return billable_weight * shipping_rates[destination_zone] Had we applied “don’t repeat yourself” too early, we would have lost the context and specific requirements of each calculation. DRY Can Introduce Complexity The DRY principle is misinterpreted as a blanket rule to avoid any duplication at all costs, which can lead to complexity. When you try to avoid repetition by introducing abstractions, you have to deal with all the edge cases in a place far away from the actual business logic. You end up adding redundant checks and conditions to the abstraction, just to make sure it works in all cases. Later on, you might forget the reasoning behind those checks, but you keep them around “just in case” because you don’t want to break any callers. The result is dead code that adds complexity to the codebase; all because you wanted to avoid repeating yourself. The common wisdom is that if you repeat yourself, you have to fix the same bug in multiple places. But the assumption is that the bug exists in all copies. In reality, each copy might have evolved in different ways, and the bug might only exist in one of them. When you create a shared abstraction, a bug in that abstraction breaks every caller, breaking multiple features at once. With duplicated code, a bug is isolated to just one specific use case. Clean Up Afterwards Knowing that you didn’t break anything in a shared abstraction is much harder than checking a single copy of the code. Of course, if you have a lot of copies, there is a risk of forgetting to fix all of them. The key to making this work is to clean up afterwards. This can happen before you commit the code or during a code review. At this stage, you can look at the code you copied and see if it makes sense to keep it as is or if you can see the right abstraction. I try to refactor code once I have a better understanding of the problem, but not earlier. A trick to undo a bad abstraction is to inline the code back into the places where it was used. For a while, you end up “repeating yourself” again in the codebase, but that’s okay. Rethink the problem based on the new information you have. Often you’ll find a better abstraction that fits the problem better. When the abstraction is wrong, the fastest way forward is back. —Sandi Metz, The Wrong Abstraction tl;dr It’s fine to look for the right abstraction, but don’t obsess over it. Don’t be afraid to copy code when it helps you keep momentum and find the right abstraction. It bears repeating: “Repeat yourself.” For some examples, see Ferris working on Rustendo64 or tokiospliff working on a C++ game engine. ↩ This is also how I write prose: I first write a draft and block my inner critic, and then I play the role of the editor&#x2F;critic and “refactor” the text. This way, I get the best of both worlds: a quick feedback loop which doesn’t block my creativity, and a final product which is more polished and well-structured. Of course, I did not invent this approach. I recommend reading “Shitty first drafts” from Anne Lamott’s book Bird by Bird: Instructions on Writing and Life if you want to learn more about this technique. ↩ This is similar to the OODA loop concept, which stands for “Observe, Orient, Decide, Act.” It was developed by military strategist John Boyd. Fighter pilots use it to wait until the last responsible moment to decide on a course of action, which allows them to make the best decision based on the current situation and available information. ↩ " + }, + { + "title": "Watching Millionaires", + "url": "https://endler.dev/2025/watching-millionaires/", + "body": "I watched the Champions League final the other day when it struck me: I’m basically watching millionaires all the time. The players are millionaires, the coaches are millionaires, the club owners are millionaires. It’s surreal. This week I watched John Wick Ballerina and, again, there’s Keanu Reeves, who is a millionaire, and Ana de Armas, who is as well. Yesterday I heard about Trump and Musk fighting. They are not millionaires, they are billionaires! As I’m writing this, I’m watching the Rock am Ring live stream, a music festival in Germany. Weezer is playing. These guys are all millionaires. I don’t know what to make of it. It’s a strange realization, but one that feels worth sharing. I could go down the road of how this fixation on elites distracts us from the people nearby, but that’s not quite it. What interests me more is how normalized this has become. Maybe it’s just the power law in action: a few rise to the top, and we amplify them by watching. But most people in every field aren’t millionaires. We just don’t see them. You’re on a tiny blog by a tiny man and if you made it this far, I appreciate you. It looks as if you care about the little stories as well. If you’re anything like me, you’re not only enjoying the little stories, you’re actively seeking them out – but there’s so few of it nowadays. Yes, there are still places where people share their stories, but you need to know where to look. If anything, we all should share more. Write about the little things, the everyday moments, the people you meet, the things you care about. Don’t live anybody else’s life! Rivers Cuomo, Weezer’s lead singer, once wrote: My motivation is much different now than it was then: then I was terribly discontent and dreaming of being a classical composer, a writer, or basically anything that I wasn’t; now I just want to enjoy my life and do the responsible thing—graduate. That’s from his Letter For Readmission To Harvard (2005). Nobody forced him to go back to Harvard after so many years. He was a freaking millionaire rock star by then. And yet, he did. He stopped pretending and started living. We don’t have to keep watching other people’s lives. Live your own. " + }, + { + "title": "Paolo the Plumber", + "url": "https://endler.dev/2025/paolo/", + "body": "Paolo was a plumber. People knew him as a reliable and thorough craftsman. He fixed the pipes in his small town and made a good living doing so. One day, his friend Mario told him that he’d bought a plumbing machine. Paolo was intrigued and asked how it worked. “It’s magical!” said Mario. “I show it what’s broken, and it fixes the problem in no time!” Paolo asked if he could watch the machine work. The next day, Paolo and Mario took the machine to a house with a broken pipe. Paolo watched as Mario positioned the machine by the pipe. “Beep boop,” and the machine started working, and quickly. Paolo noticed the machine turned the wrench back and forth instead of steady pressure - something he could adapt for his own work. Within minutes, the pipe was fixed. “Soon no one will need plumbers anymore,” said Mario. “I can already do the work of ten plumbers with this machine!” That night, Paolo couldn’t sleep. He thought about his job and how it might change. He loved being a plumber and helping people. But what if machines really took over? Within a few weeks, Paolo’s phone stopped ringing. People were calling Mario instead because he did quicker, cheaper work. Some of Paolo’s old customers told him he was “old-fashioned” and “out of touch.” In the past, none of his customers had ever complained about his work. He always took time to do things right. He would check every joint, seal every pipe, and make sure everything was perfect before leaving. Sometimes he noticed other problems that needed fixing and he would offer to fix those too. Then one day, he got a call from an old customer. It was an emergency. The pipes in the restaurant were leaking and they needed help fast. Paolo rushed over and found a mess. He got to work and fixed the problem. “We just got it fixed the other day!” When Paolo asked who did the work, the owner said it was Mario. From that day on, more people called Paolo. They all had problems after working with Mario and the machine. Paolo kept finding the same mistakes: pipes not properly sealed, joints not aligned correctly, leaks temporarily fixed with instant glue. Sometimes the machine would add extra parts: pipes that ended nowhere, valves that didn’t connect to anything. Paolo recognized these as signs of the machine at work. Paolo called Mario and told him what he’d found. Mario knew about the issues: “I told it to fix it, but it didn’t work right. Even when I asked multiple times and was very polite.” And worse: “One time I looked away for a moment and the machine started remodeling the bathroom! It added a new sink that wasn’t there before.” Paolo asked why he didn’t just fix it himself. “I can’t,” Mario said. “I don’t know how to do it without the machine.” Mario had been a reputable plumber before he got the machine. Now he was relying on a machine that didn’t always work. Worse, Mario didn’t own the machine but rented it from a company far away. The rent was cheap in the beginning, but now it was getting more expensive. Paolo realized that Mario wasn’t the only one. Many plumbers were using machines now, and new plumbers were learning machines instead of tools. It wasn’t just plumbers—electricians, carpenters, other tradespeople were all relying on machines. The machines caused problems, but the company promised they would fix everything and get better with time. They kept updating the machines and gave them fancy names, but the problems remained. Paolo just kept working. He fixed what the machines broke. His customers called him back for more work. Soon his phone was ringing like before. A while later, a salesperson came to town with a new machine. Paolo heard Mario talking to him at the coffee shop. " + }, + { + "title": "Reinvent the Wheel", + "url": "https://endler.dev/2025/reinvent-the-wheel/", + "body": "One of the most harmful pieces of advice is to not reinvent the wheel. It usually comes from a good place, but is typically given by two groups of people: those who tried to invent a wheel themselves and know how hard it is those who never tried to invent a wheel and blindly follow the advice Either way, both positions lead to a climate where curiosity and exploration gets discouraged. I’m glad that some people didn’t follow that advice; we owe them many of the conveniences of modern life. Even on a surface level, the advice is bad: We have much better wheels today than 4500–3300 BCE when the first wheel was invented. It was also crucially important that wheels got reinvented throughout civilizations and cultures. Note: When I say “wheel” throughout this post, please replace it with whatever tool, protocol, service, technology, or other invention you’re personally interested in. Inventing Wheels Is Learning “What I cannot create, I do not understand” – Richard Feynman, Physicist and Nobel Prize Winner To really understand something on a fundamental level, you have to be able to implement a toy version first. It doesn’t matter if it’s any good; you can throw it away later. In Computer Science, for example, there are many concepts that are commonly assumed to be beyond the abilities of mere mortals: protocols, cryptography, and web servers come to mind. More people should know how these things work. And therefore I think people should not be afraid to recreate them. Everything Is A Rabbit Hole Too often, fundamental things are taken for granted. For example strings or paths are super complicated concepts in programming. It’s a great exercise to implement a string or a path library yourself if you’re interested in how they work. Even if nobody ends up using your work, I bet you’ll learn a lot. For example: There is an infinite complexity in everyday things. Building something that even a single other person finds useful is a humbling experience. Humans like you created these abstractions. They are not perfect and you can make different tradeoffs in your own design. On the last point, everything is a tradeoff and there are dozens, sometimes hundreds of footguns with every toy problem. Along the way, you will have to make decisions about correctness, simplicity, functionality, scalability, performance, resource usage, portability, and so on. Your solution can be great in some of these things, but not all of them and not for all users. That also implies that existing solutions have flaws and might not be designed to solve your particular problem; no matter how well-established the solution is. Going down rabbit holes is fun in its own way, but there is one other benefit: It is one of the few ways to level up as an engineer… but only if you don’t give up before you end up with a working version of what you tried to explore. If you jump between projects too often, you will learn nothing. Reasons for Reinventing the Wheel There are great reasons to reinvent the wheel: Build a better wheel (for some definition of better) Learn how wheels are made Teach others about wheels Learn about the inventors of wheels Be able to change wheels or fix them when they break Learn the tools needed to make wheels along the way Learn a tiny slice of what it means to build a larger system (such as a vehicle) Help someone in need of a very special wheel. Maybe for a wheelchair? Who knows? The wheel you come up with might not be the best use for a car, but maybe for a… skateboard or a bike? Or you fail building a nicer wheel, but you come up with a better way to test wheels along the way. Heck, your wheel might not even be meant for transportation at all! It might be a potter’s wheel, “a machine used in the shaping (known as throwing) of clay into round ceramic ware” according to Wikipedia. You might end up building a totally different kind of wheel like a steering wheel or a flywheel. We need more people who think outside the box. Reuse vs Reinvent Of course, don’t disregard the works of others – study their work and reuse where you see fit. Don’t reinvent the wheel out of distrust or ignorance of the work of others. On the other side, if you never tried to put your knowledge to the test, how would you ever learn enough about your field to advance it? I observed you can move very quickly by running little experiments. Especially in software engineering, building small prototypes is cheap and quick. Solve your own problem, start small, keep it simple, iterate. So, with all of the above, here’s my advice: Reinvent for insight. Reuse for impact. " + }, + { + "title": "No Matter What", + "url": "https://endler.dev/2024/no-matter-what/", + "body": "As kids, our parents established a few simple rules that we would all follow, no matter the circumstances. One of them was that we’d always have dinner together in the evening, typically around 6pm. In almost two decades, they never broke that rule. We had dinner on 9&#x2F;11 and when mom was at the hospital. It’s not always easy. There’s a nice thing that happens when you have such a golden rule: it has ripple effects. Since we had dinner together every evening, we would always have time to talk about the day. Problems would be uncovered earlier. We would know about each other’s appointments for the next day. It provided structure throughout the rest of the day. It put things into perspective. It grounded us. Bad grade at school? Dinner at 6. Played computer games all afternoon and lost track of time? Dinner at 6. No matter how bad your day was, dinner is always waiting for you. As a kid, it sounded like one of those “stupid” rules only grown-ups would come up with. And in fact, my parents knew that it was stupid. They did it anyway. As a kid, that made their life look extremely dull and boring. I remember pitying my dad once for being such a slave to society. Yet, they persisted because without it, things would fall apart. Skipping dinner is about way more than skipping dinner. These Rules Are Simple, But Not Easy It’s a simple rule with little room for interpretation. However, it’s not easy: there are times when you have to drop something else to make dinner at 6 work. That’s when the rule counts the most! That’s what makes or breaks it. Following the rule 90% of the time is much easier than following it 100% of the time. You have to make sacrifices. You have to say no sometimes. That’s the price it takes to stick to the rule. Yes, such rules “sound” stupid, but there’s a deeper, almost stoic realization to it: Life is complicated and will throw obstacles in your way. But if you really want to make progress, you have to find a way. If nothing else helps, make up a stupid rule; and the harder you struggle, the more specific the rule should be. Dinner. Every day. At 6 o’clock. Only now am I discovering this for myself. In 2019, I mentioned to my friend Abu that I felt bad for not doing any sports. It’s not that I didn’t try, it’s just that nothing lasted for long. He suggested going for a run together on Tuesdays – no matter what. I thought that was ridiculous. I told him that it couldn’t possibly work. Why Tuesdays of all days!? It felt so random. In my mind, I started negotiating. But there’s no point in negotiating with irrationality. Fast forward 5 years, and I still run every Tuesday. I actually suck at running. My pace isn’t fast. The distance isn’t far, but it’s a solid effort. Time was made. It worked out. Again, it had positive rippling effects: I ran on Crete in Greece and Sardinia in Italy. Different people joined me on my runs. If Tuesday finds me elsewhere, my running shoes come along. Now, did I always manage to run on a Tuesday? No. It’s not easy! But I always gave it a solid attempt and I can remember each time I didn’t run. Since Abu and I run together a lot, we would talk about our week. If we didn’t make up that rule, we would never have started to know each other on such a deep level. Some people won’t understand when you tell them that you have to do a thing “no matter what.” Instead of telling them I have to go for a run, I say I’m busy that evening. Nobody ever asks any questions. Isn’t this just a habit? With “no matter what” there can be serious consequences. If you have to take care of a loved one, you can’t skip a day. Or if you’re an Air Traffic Controller, failure is not an option. My stakes are not as high, but I take them very seriously. “No matter what” rules aren’t habits, at least not in the beginning. They can, however, turn into super strong habits with time. I found that the best way to implement a “NMW Rule” is to do it on the spot. When my dentist asked me if I floss every day (I didn’t), I made the decision to start right then and there and never skip a day. Another good way to get started is to take on some lightweight responsibility. For example, I recommend getting plants. Then you have to water them – no matter what. If the plant dries out, you broke the rule; simple as that. The great thing is that the watering interval is usually pretty low, so there’s time to get used to it (but getting used to it you must). If it works, you’ll enjoy the feeling of continuity. It’s like a chain of good deeds. A new habit is born. In the past, I never had any plants. Now our apartment is full of them. I love the companionship and the continuity. What’s your “NMW”? If you already have a “no matter what” rule, you have my deepest respect. If not, whether you want to write that book, run that marathon, or just save a few bucks each month, make it work – no matter what. " + }, + { + "title": "The Best Programmers I Know", + "url": "https://endler.dev/2025/best-programmers/", + "body": "I have met a lot of developers in my life. Lately, I asked myself: “What does it take to be one of the best? What do they all have in common?” In the hope that this will be an inspiration to someone out there, I wrote down the traits I observed in the most exceptional people in our craft. I wish I had that list when I was starting out. Had I followed this path, it would have saved me a lot of time. Read the Reference If there was one thing that I should have done as a young programmer, it would have been to read the reference of the thing I was using. I.e. read the Apache Webserver Documentation, the Python Standard Library, or the TOML spec. Don’t go to Stack Overflow, don’t ask the LLM, don’t guess, just go straight to the source. Oftentimes, it’s surprisingly accessible and well-written. Know Your Tools Really Well Great devs understand the technologies they use on a fundamental level. It’s one thing to be able to use a tool and a whole other thing to truly grok (understand) it. A mere user will fumble around, get confused easily, hold it wrong and not optimize the config. An expert goes in (after reading the reference!) and sits down to write a config for the tool of which they understand every single line and can explain it to a colleague. That leaves no room for doubt! To know a tool well, you have to know: its history: who created it? Why? To solve which problem? its present: who maintains it? Where do they work? On what? its limitations: when is the tool not a good fit? When does it break? its ecosystem: what libraries exist? Who uses it? What plugins? For example, if you are a backend engineer and you make heavy use of Kafka, I expect you to know a lot about Kafka – not just things you read on Reddit. At least that’s what I expect if you want to be one of the best engineers. Read The Error Message As in Really Read the Error Message and Try to Understand What’s Written. Turns out, if you just sit and meditate about the error message, it starts to speak to you. The best engineers can infer a ton of information from very little context. Just by reading the error message, you can fix most of the problems on your own. It also feels like a superpower if you help someone who doesn’t have that skill. Like “reading from a cup” or so. Break Down Problems Everyone gets stuck at times. The best know how to get unstuck. They simplify problems until they become digestible. That’s a hard skill to learn and requires a ton of experience. Alternatively, you just have awesome problem-solving skills, e.g., you’re clever. If not, you can train it, but there is no way around breaking down hard problems. There are problems in this world that are too hard to solve at once for anyone involved. If you work as a professional developer, that is the bulk of the work you get paid to do: breaking down problems. If you do it right, it will feel like cheating: you just solve simple problems until you’re done. Don’t Be Afraid To Get Your Hands Dirty The best devs I know read a lot of code and they are not afraid to touch it. They never say “that’s not for me” or “I can’t help you here.” Instead, they just start and learn. Code is just code. They can just pick up any skill that is required with time and effort. Before you know it, they become the go-to person in the team for whatever they touched. Mostly because they were the only ones who were not afraid to touch it in the first place. Always Help Others A related point. Great engineers are in high demand and are always busy, but they always try to help. That’s because they are naturally curious and their supportive mind is what made them great engineers in the first place. It’s a sheer joy to have them on your team, because they are problem solvers. Write Most awesome engineers are well-spoken and happy to share knowledge. The best have some outlet for their thoughts: blogs, talks, open source, or a combination of those. I think there is a strong correlation between writing skills and programming. All the best engineers I know have good command over at least one human language – often more. Mastering the way you write is mastering the way you think and vice versa. A person’s writing style says so much about the way they think. If it’s confusing and lacks structure, their coding style will be too. If it’s concise, educational, well-structured, and witty at times, their code will be too. Excellent programmers find joy in playing with words. Never Stop Learning Some of the best devs I know are 60+ years old. They can run circles around me. Part of the reason is that they keep learning. If there is a new tool they haven’t tried or a language they like, they will learn it. This way, they always stay on top of things without much effort. That is not to be taken for granted: a lot of people stop learning really quickly after they graduate from University or start in their first job. They get stuck thinking that what they got taught in school is the “right” way to do things. Everything new is bad and not worth their time. So there are 25-year-olds who are “mentally retired” and 68-year-olds who are still fresh in their mind. I try to one day belong to the latter group. Somewhat related, the best engineers don’t follow trends, but they will always carefully evaluate the benefits of new technology. If they dismiss it, they can tell you exactly why, when the technology would be a good choice, and what the alternatives are. Status Doesn’t Matter The best devs talk to principal engineers and junior devs alike. There is no hierarchy. They try to learn from everyone, young and old. The newcomers often aren’t entrenched in office politics yet and still have a fresh mind. They don’t know why things are hard and so they propose creative solutions. Maybe the obstacles from the past are no more, which makes these people a great source of inspiration. Build a Reputation You can be a solid engineer if you do good work, but you can only be one of the best if you’re known for your good work; at least within a (larger) organization. There are many ways to build a reputation for yourself: You built and shipped a critical service for a (larger) org. You wrote a famous tool You contribute to a popular open source tool You wrote a book that is often mentioned Why do I think it is important to be known for your work? All of the above are ways to extend your radius of impact in the community. Famous developers impact way more people than non-famous developers. There’s only so much code you can write. If you want to “scale” your impact, you have to become a thought leader. Building a reputation is a long-term goal. It doesn’t happen overnight, nor does it have to. And it won’t happen by accident. You show up every day and do the work. Over time, the work will speak for itself. More people will trust you and your work and they will want to work with you. You will work on more prestigious projects and the circle will grow. I once heard about this idea that your latest work should overshadow everything you did before. That’s a good sign that you are on the right track. Have Patience You need patience with computers and humans. Especially with yourself. Not everything will work right away and people take time to learn. It’s not that people around you are stupid; they just have incomplete information. Without patience, it will feel like the world is against you and everyone around you is just incompetent. That’s a miserable place to be. You’re too clever for your own good. To be one of the best, you need an incredible amount of patience, focus, and dedication. You can’t afford to get distracted easily if you want to solve hard problems. You have to return to the keyboard to get over it. You have to put in the work to push a project over the finishing line. And if you can do so while not being an arrogant prick, that’s even better. That’s what separates the best from the rest. Never Blame the Computer Most developers blame the software, other people, their dog, or the weather for flaky, seemingly “random” bugs. The best devs don’t. No matter how erratic or mischievous the behavior of a computer seems, there is always a logical explanation: you just haven’t found it yet! The best keep digging until they find the reason. They might not find the reason immediately, they might never find it, but they never blame external circumstances. With this attitude, they are able to make incredible progress and learn things that others fail to. When you mistake bugs for incomprehensible magic, magic is what it will always be. Don’t Be Afraid to Say “I Don’t Know” In job interviews, I pushed candidates hard to at least say “I don’t know” once. The reason was not that I wanted to look superior (although some people certainly had that impression). No, I wanted to reach the boundary of their knowledge. I wanted to stand with them on the edge of what they thought they knew. Often, I myself didn’t know the answer. And to be honest, I didn’t care about the answer. What I cared about was when people bullshitted their way through the interview. The best candidates said “Huh, I don’t know, but that’s an interesting question! If I had to guess, I would say…” and then they would proceed to deduce the answer. That’s a sign that you have the potential to be a great engineer. If you are afraid to say “I don’t know”, you come from a position of hubris or defensiveness. I don’t like bullshitters on my team. Better to acknowledge that you can’t know everything. Once you accept that, you allow yourself to learn. “The important thing is that you don’t stop asking questions,” said Albert Einstein. Don’t Guess “In the Face of Ambiguity, Refuse the Temptation to Guess” That is one of my favorite rules in PEP 20 – The Zen of Python. And it’s so, so tempting to guess! I’ve been there many times and I failed with my own ambition. When you guess, two things can happen: In the best case you’re wrong and your incorrect assumptions lead to a bug. In the worst case you are right… and you’ll never stop and second guess yourself. You build up your mental model based on the wrong assumptions. This can haunt you for a long time. Again, resist the urge to guess. Ask questions, read the reference, use a debugger, be thorough. Do what it takes to get the answer. Keep It Simple Clever engineers write clever code. Exceptional engineers write simple code. That’s because most of the time, simple is enough. And simple is more maintainable than complex. Sometimes it does matter to get things right, but knowing the difference is what separates the best from the rest. You can achieve a whole lot by keeping it simple. Focus on the right things. Final Thoughts The above is not a checklist or a competition; and great engineering is not a race. Just don’t trick yourself into thinking that you can skip the hard work. There is no shortcut. Good luck with your journey. " + }, + { + "title": "Search Index", + "url": "https://endler.dev/tinysearch.json/", + "body": "" + }, + { + "title": "So You Want to Start a (Tech) Podcast", + "url": "https://endler.dev/2024/so-you-want-to-start-a-tech-podcast/", + "body": "For the past year, I’ve been hosting the Rust in Production, a podcast about companies who shape the future of infrastructure. This journey has taught me a lot about what it takes to create and maintain a successful podcast. Well, success is always relative; at the moment we have around 5k regular (monthly) listeners. Maybe not a ton of people, but it puts us comfortably into the top 5% of podcasts – at least by some statistics. Whether you’re considering starting your own podcast or just curious about the process, I hope my experiences can offer some valuable insights. The ‘Rust in Production’ podcast cover Do Your Research Before you dive into the world of podcasting, take some time to explore the landscape. Think about branding and positioning first. Topic When choosing your topic, make sure it’s something you can easily generate ideas for. Try to come up with at least 10 potential episode ideas before you settle on it. If you’re planning an interview-based podcast, ensure you have a large enough network to secure at least 10 guests. Competition Research your competition. Listen to similar podcasts and note down what you like and dislike about them. This will help you differentiate your podcast from others in the same niche. If a podcast is already covering your topic, that’s not necessarily a bad thing; it just shows there’s an audience for it. However, you need to find a unique angle or a different format to stand out. If you can’t be the first, be the best. Be the funniest, or the most in-depth, or the one with the most interesting guests. Be honest with yourself about what you can offer that others can’t. If you can’t find a unique angle, it might be better to choose a different topic. If you’re not sure, ask your friends or colleagues for their opinion. Podcast Name What’s the title of your podcast? Is it catchy and easy to remember? Does it convey what your podcast is about? Is the domain name available? Are the social media handles available? Is there a simple abbreviation you can use for hashtags or mentions? Especially the last few points are often overlooked. You want to make it as easy as possible for people to find your podcast. I’d say don’t be too clever with your podcast name. It should be easy to remember and spell. If you have to explain it, it’s probably too complicated. Also, don’t use special characters or numbers in your podcast name. It makes it harder to remember and type. Don’t pick a too generic name either. Be specific about your niche. So instead of “The JavaScript Podcast,” go for “Refactoring JavaScript” or “React Weekly.” Don’t forget about SEO. Consider what people might search for when looking for content like yours. My podcast is titled “Rust in Production,” which is a commonly searched term. This has helped with discoverability. Another version of that, which could work, is to think about questions that people Google for. E.g. “What is functional programming?” or “How to refactor legacy code?” and then coming up with a podcast name that answers that question. For example, “Functional Programming Explained” or “Refactoring Legacy Code.” Cover Art Your podcast’s cover is equally crucial. It’s the first thing people recognize about your podcast (except for the title) before they decide what to listen to, so it needs to stand out from the crowd. What I did was open my podcast app and look at the grid of covers. The grid of podcast covers in my podcast app I asked myself which ones stood out and why. I also asked a few friends and my partner to do the same. I got some great feedback that way. This visual first impression can make a big difference in attracting new listeners. Length Next, decide on your podcast’s length. Fifteen minutes is great for news content, 30 minutes work well for commutes, and one hour is suitable for deep dives. Anything longer, and listeners might hesitate to commit their time. Plan Your Content Once you’ve done your research, it’s time to plan your content strategy. Having a regular schedule is key - weekly or biweekly episodes work well for many podcasts. Start conservatively; you can always increase frequency later, but underestimating the workload can lead to burnout. I highly recommend buffering content by recording a few episodes before you start publishing. This gives you a cushion and reduces stress, especially when you’re just starting out. Consider a season-based approach. For “Rust in Production,” we do 7-8 episodes and then take a break. This allows for better planning and reduces ongoing pressure. Respect Your Guests If you’re doing an interview-based podcast, treating your guests with respect is paramount. Explain why you want them on your show in the initial email. Keep them informed about the process and be flexible with scheduling. At the start of the recording, explain how things will work and ask if they have any time constraints. Remember, your guests are likely doing this for free. Respect their time and make the experience as smooth as possible for them. Invest in Quality Audio quality can make or break a podcast. Invest in good equipment - get a decent microphone and headphones, and consider ways to improve your room’s acoustics. If you’re interviewing guests remotely, consider their equipment too. A pre-call to check their setup can be invaluable, or you might even consider sending them equipment if you want consistent quality across episodes. Always remind guests to stay close to the mic. It’s a small detail that can make a big difference in audio quality. On two occasions, I had guests who had their condenser mic backwards, and that sounds pretty dull. You get better at picking up on these things the more you record. It helps to know which way the mic should be facing (usually the logo on the mic and the volume knob should be facing you). Both guests were very grateful for the tip and the audio quality improved significantly. Production Tips One of the best decisions I made was not to edit the podcast myself. I’m incredibly thankful that Simon Brüggen agreed to do the editing for “Rust in Production.” It would have been an enormous amount of work on top of finding guests, recording, and hosting. It also helps that Simon is a Rust developer and understands the content. He can give tips on how to improve the content from a technical perspective. For recording, tools like Zencastr, Riverside, or Descript are excellent. They capture audio on both sides, giving you uncompressed files to work with. Auphonic is great for cleaning up audio, removing filler words, and creating transcripts. When it comes to hosting, I use Letscast. They’re not the cheapest option, but their customer service is top notch and the website is fast and not bloated. Develop Your Style As you progress, you’ll naturally develop your own podcasting style. For me, I prefer to let guests do most of the talking, only interjecting occasionally with questions or comments. The motto is “say less, ask more.” It’s a good rule of thumb for interviews. It’s not about you, it’s about the guest. Let them shine. In pursuit of asking better questions, I wrote an essay on how to ask better questions. I’ve also found that taking notes during recording helps me ask better follow-up questions. Don’t be afraid to encourage your guests when they make good points. A nod, a smile, or a thumbs up can go a long way in making them feel comfortable and valued. Don’t Sweat the Small Stuff While it’s tempting to obsess over metrics, try not to focus on them too much. Instead, concentrate on producing content you’d enjoy consuming yourself. Be passionate about your topic and create your podcast as if no one is listening - ironically, this often leads to the most engaging content. Starting a podcast is a lot of work, but it’s incredibly rewarding. The podcast space isn’t oversaturated yet - it reminds me of the golden age of YouTube a few years ago. Podcasting is becoming more professional now, but there’s still plenty of room for new formats and perspectives. Remember: it’s okay to start small and grow. You’ll learn and improve with each new episode. The most important thing is to enjoy the process and share your personality with the world. If you’re interested in Rust, consider listening to the Rust in Production podcast. I’d love to hear what you think! " + }, + { + "title": "Follow the Hackers", + "url": "https://endler.dev/2024/follow-the-hackers/", + "body": "Want to see tomorrow’s important technologies? Watch what hackers are passionate about today. Defining “Hacker” I’m using the term “hacker” in the spirit of the Hacker Ethic, as described by authors like Steven Levy and Pekka Himanen. In this context, a hacker is someone who: Follows their passion and seeks self-fulfillment through technology Creates something beneficial for the wider community Values freedom, cooperation, and voluntary work Challenges traditional work ethics with a focus on creativity and sharing These folks are a small subset of the population, but they have some traits that make them excellent predictors of the future: They care deeply about their field - programming being just one example They’re passionate about the things they believe in They’d use something even if no one else cared about it They work at the cutting edge, so they need the best tools to do their job They hold strong opinions on what works and what doesn’t, backed by solid evidence They don’t care about investors, quarterly earnings reports, or politics - they purely focus on the technology’s value Catalysts For Success And Red Flags Of course, not every hyped technology makes it big. Remember NFTs or Web3? The key difference? Real hackers were never passionate about these technologies - ordinary people were. Another red flag is when the technology’s benefits are hard to explain. If hardcore tech people can’t explain the benefits to you, that’s a bad sign. Look deeper and you’ll find different motivations at work! It’s typically people looking to profit from technology. They brand themselves as Investors, “Serial Entrepreneurs”, and “Thought Leaders”. You’ll find them on LinkedIn, updating their profiles with the latest buzzwords every few months. While a few are legitimate, most are opportunists who couldn’t explain the technology to save their lives. Profit is their only motivation. The hackers? They don’t care what you think about them. They’ve got nothing to sell you. They’re too busy building cool stuff! A question hackers care about is “who owns the platform”? Companies always have an agenda. Pour in your time and effort, and they might lock you out to profit from your work. Hackers don’t like that. Therefore, all the winning ideas I mentioned are open source. If a technology isn’t, that’s a major red flag when evaluating its future potential. It’s not even optional anymore - it’s pretty much mandatory. But there’s another reason why open source is a catalyst for success: Initially, open source projects start as minimally functional versions without user-friendly documentation. They might be tough to set up, but the core idea is there. If people stick with it despite the lack of hand-holding, you know it’s solving a real problem - and that’s a sign of a winning idea. You Still Need Patience You probably know I’m all in on Rust. After all, I make my living as a Rust consultant. It took Rust over a decade of development to gain any real traction in the industry. It’s been a slow but steady climb. It takes time for the public to catch up before a technology hits its stride. For core technologies like programming languages or databases, it often takes a decade or more. That’s simply how long technology needs to mature. That’s why I tell founders to stay slightly conservative when adopting new tech. The industry needs time to catch up, and big companies need specialized tools to integrate new tech into their existing systems. On the other side, investing early in promising technologies is a calculated risk because the writing is on the wall. What Can You Learn From This? Hackers are already living in the future. You can use that to your advantage. Ask 10 hackers what new things they’re really excited about, and you’ll get a good picture of what’s going to be important in a few years. Most business people don’t talk to hackers regularly. That’s a fact you can use to your advantage. If you’re selling to developers (and you probably shouldn’t), the key is to really listen to what the hackers are saying and then follow their lead. " + }, + { + "title": "Be Simple", + "url": "https://endler.dev/2024/be-simple/", + "body": "Last night I realized that my life is very simple. That’s not by chance, but by conscious effort. Life becomes complex all by itself if you do nothing about it. One day you’ll wake up and you have a mortgage, 10 on-demand subscriptions, 20 insurances, 1000 open browser tabs, a demanding job and a dog. And when you realize it, you wonder how you got there. I keep my life simple because I know my time is limited. Time and health are my best proxies for happiness. Simple Doesn’t Mean Boring Quite the contrary: give me enough time and I find ways to entertain myself. My friends might disagree, but I consider myself to be an introvert. I like to spend time on my own to explore and learn. There hasn’t been a boring moment in a long time. If life was more complex, that would take away my time, but time is the resource I can’t replenish, so I protect it. How? Mostly by saying NO. No streaming subscriptions. No Disney+, no Netflix. I rarely watch TV anyway. No gym memberships. Just run in the park. No Instagram or TikTok, but part of that is getting older. My shoes are 6 years old. So is my wardrobe. No meetings if possible. I’m the guy who sits at his desk for 8 hours straight, only getting up for bathroom breaks twice. No property; I’m a happy tenant. No commute; I work remotely. Great home cinema setup you have there. Thanks for inviting me over! At home, I don’t even have external speakers. You’re planning a trip to the Bahamas? Enjoy! Send me a photo. Regarding technology, that means: Even though I’m a programmer, I only have a single screen. I evaluate tools, but I keep the number of tools limited. My editor doesn’t have a debugger. No Notion or Obsidian if a text file + git is enough. I limit the number of browser tabs with an extension. Simple Doesn’t Mean Minimalistic My goal is not to have as few possessions as possible – I own a lot – but to lead a simple life. I’ll happily buy things if they make my life simpler. The last big life improvements were a robot vacuum cleaner (four years ago) and an automated cat litter box (two years ago). If I decide to buy something, I make sure it’s the absolute best I can afford. (My rule is actually “don’t buy crap”.) For example, I spent a ridiculous amount on the best laptop I could buy. It’s my daily driver that I spend most of my time with, so it needs to be an absolute workhorse. My work is also compute-intensive, so I saw the purchase as justified. I always pay the price in full. No lease, no monthly payments. I have to use services for work, but I prefer monthly payments over yearly subscriptions, even if they are 30% more expensive. The fact that I can cancel at any time is more important to me. I know that when I buy something, it demands my attention. Maintenance is not fun. Even though I like the idea of owning something, I probably don’t truly own it. That makes me the worst consumer possible. I keep things in my Amazon basket forever. From time to time I look at the items, and when I enjoy seeing them in my basket… I keep them there. The rest, I just delete. This way, I get the “feeling” of owning things without spending any money. Simple Doesn’t Mean Convenient To live with such a person is not easy. We have regular discussions about “investing” money into things that I’m skeptical about. It takes me ages to reach a conclusion. Vacation planning is definitely one of my weaknesses. I am very well aware that my approach is not perfect at times. I am okay with making these sacrifices for protecting my time and therefore my happiness. Recently, I got a few emails from people telling me that my newsletter subscription is broken. I’m aware of that. My newsletter provider shut down. I won’t fix it. It turns out that people find other ways to follow me; either on Mastodon or via RSS. I also don’t have a comment section – Reddit or HN work just fine. Even though there are a few folks on my old newsletter list, I never got around to sending many emails. I don’t particularly enjoy writing a newsletter, so it might be best that it finally broke. I will probably remove the signup box. It would be nice to have it “just work”, but the next best thing is to not have it at all. Simple Means Letting Go Perhaps another way to explain it is the midwit theme: I try to stay on the left side of this curve as much as I can. I’m aware that there are “smarter” ways to do things, but I don’t want to dedicate time to learning about them. I only dedicate time to things that matter to me and that I want to go really deep into. The meme shows simple approaches on both ends, with a complicated phase in between. Getting to the right side of the spectrum takes lots of effort, and I’ve only made that journey a few times in my life. For the rest, knowing there’s an awkward complicated phase in between keeps me happily on the simple side. It’s fine. Simple Means Focused Greatness comes from dedicating time to the things that matter. The most productive people I know are focused. Yes, there’s a creative process and they allow themselves to be creative, but they do so in a very constrained environment: their office. While others chase trends, they do the thing they’re always doing. They put in the hours. It’s way easier to be focused when life is simple. When there’s no room for distractions and complexity. I find that constraints help as well. Technology is one major source of distraction. Some of the best stories were written with a typewriter. In itself, it’s a very limited environment, but it takes away all the distractions and lets you focus on the task at hand. I find that inspiring, liberating. That’s why I like constraints. When I give presentations, I wonder what I’d write if I could only have 5 slides with 5 words each, or I could only use two colors, or only show images. It keeps me focused on the message. It’s simple. Simple is beautiful. Simple makes me happy. " + }, + { + "title": "What to Write", + "url": "https://endler.dev/2024/what-to-write/", + "body": "People sometimes ask me how I come up with things to write. To me, it’s the same as asking how I come up with things to say. There’s always something to say. It might not be novel or interesting to most, but it’s important to me and hopefully to someone else. What people actually want to know is how to come up with something interesting to write. But why should that matter? What if people don’t find it interesting? Was it a waste of time? Why Write? There’s this funny thing which happens when you write for a while: you forget what excited you about writing in the first place. Instead, you find yourself chasing trends, trying to get more views, and build a following. Even if you’re aware that this is happening, it’s hard to stop. Your inner monologue tells you that what you’re writing isn’t good enough or that your readers won’t like it. Writing becomes a chore. Eventually, you stop writing. Somewhat tautologically, people come here exactly for one reason: to read what I write. If I make it about them, I have to guess what they want to hear, which kills the joy in writing, and also, in reading, as the content becomes predictable. Interesting Doesn’t Mean Novel. Just because someone else wrote about the same topic doesn’t mean it’s off-limits. There are a million love songs to disprove that. As it turns out, while they all revolve around the same topic, they’re all unique. They are personal, which is what makes them different. Some of these songs I like because I can relate to them. To me, that’s what makes them interesting: it’s the same story but told in a different way – a personal way. And that personal makes it new and that new makes it interesting. If You Take Away the Personal, You Take Away the Interesting Writing is a lot like that. I get to learn about how other people feel and how they think. It’s mostly an experience; shaped into words. It’s beautiful to think how writing is such a simple way to learn from the experiences of others. And how, with just a few words, you can emotionally connect with a stranger. It’s a very human experience. Often, what you leave out is more important than what you keep; the reader fills in the blanks. Eventually, a story starts a life of its own; when it gets shared; when it gets retold. It’s no longer the author’s story but the reader’s. It becomes part of lore. Who wrote it isn’t that important. I can’t tell who reads this and why should I care? Instead of trying to make other people enjoy my writing, I want to connect with people who like the same topics. Big difference. Knowing That Is Liberating It gives me confidence that I will never run out of things to write. At least not as long as I remember why I write. It’s liberating because I don’t have to chase the new. Instead, whatever it turns out to be is enough. At times, I’m as clueless as the reader to see where this leads me. Maybe someone else will find joy in it, maybe not. It doesn’t matter. What matters is what you think matters, and that’s what you should write about. " + }, + { + "title": "Move Slow and Fix Things", + "url": "https://endler.dev/2024/move-slow-and-fix-things/", + "body": "Growing up as a kid in rural Bavaria, I always dreamed of moving to the US to run a startup. Many kids in my generation shared that dream. To me, it felt like the only way to combine my two greatest passions: writing code and building things. As I got older, I became disillusioned with the narrative surrounding Silicon Valley. The hockey stick growth, the VC money, the “get rich quick” mentality – it was all one big illusion. For a long time, I couldn’t put my finger on what exactly bothered me about it. Part of what made me increasingly uncomfortable was the glorification of hustle culture – the idea that you have to work yourself to the bone to make it big against all odds. The other part was the “winner takes all” mentality and the mindset that you have to “move fast and break things” to succeed. I don’t believe that has to be the case. As it turns out, I’ve always been drawn to the exact opposite: sustainable growth, robust solutions, and a long-term mindset. That’s why I’ve been contributing to open source for 15 years, why I only run small, bootstrapped businesses or non-profits, and why I focus on writing and knowledge sharing. Paul Graham and his VC buddies would have you believe that your ultimate goal as a founder should be to build a unicorn. But when I look at the Ubers, Facebooks, and Googles of this world, I see greed, gatekeeping, systemic exploitation, user tracking, excessive resource consumption, and lawsuits against competitors. These companies will do anything in their power to stay on top – even if it means bending the law or finding legal loopholes. What kind of role model is that? The Other Side Who’s on the “other side”? It’s the humble minority building small but meaningful things. These people advocate for privacy, develop civic tech, try to live within their means, move deliberately, and fix what’s broken. They fly under the radar because their success isn’t measured in dollars, and they lack big marketing budgets. Instead, they focus on their product, doing a lot with very little. I find that far more inspiring. It’s incredibly rewarding to build something people love that can provide you with a comfortable living. There’s no need to risk it all, drop out of college, work insane hours, and leave a trail of destruction in your wake. You can build something small and useful instead, without a venture capitalist breathing down your neck. It’s still hard work, but you’re leaving the campsite a little better than you found it. Note that moving slowly doesn’t mean you can’t make quick decisions. It’s just that the execution should be deliberate. Don’t wreak havoc along the way. Because the time to fix what you might break rarely comes. VCs are Not Your Friends In “Why to Not Not Start a Startup”, Paul Graham writes: So, paradoxically, if you’re too inexperienced to start a startup, what you should do is start one. That’s a way more efficient cure for inexperience than a normal job. In fact, getting a normal job may actually make you less able to start a startup, by turning you into a tame animal who thinks he needs an office to work in and a product manager to tell him what software to write. Let me be blunt: that’s nonsense. It paints a black and white picture of the world that simply doesn’t exist. You don’t have to choose between starting a startup and working a soul-crushing job as a “tame animal” in a cubicle. There’s a whole spectrum of possibilities in between! For instance, you could work for yourself or with a small team, making use of your creativity and coming up with your own ideas. Paul wants you to start a startup because he wants to fund you and profit from your hard work. His motives are purely egoistical. If you happen to hit the startup jackpot, Paul gets even richer and you might become wealthy too. If you don’t, you’re left with nothing while Paul, already rich, gets to write an essay about your failure. That’s a whole lot of risk for very little upside. You might wonder why I’m picking on Paul Graham so much. It’s because I once looked up to him and valued his essays. He represents a worldview I used to believe in, but now consider harmful. Most of his essays seem true on the surface, but dig deeper and you’ll find his claims are based on a narrow worldview and rarely supported by evidence. Misleading young, impressionable people is dangerous. Startup founders bear all the downside risk, while venture capitalists are well-insulated from failure. VCs spread their bets across numerous startups, ensuring they profit regardless of individual outcomes. For you, the founder, it’s an all-or-nothing gamble with your time, energy, and often your financial stability. Is All VC Money Bad? Of course not. But I’d argue it’s becoming less and less relevant in today’s world. Take building a software product, for example. You don’t need a fortune to get started anymore. There are website builders, cloud hosting solutions, and open source software at your fingertips. Why take on VC money when you’re just starting out? Some might argue that you profit from valuable networking opportunities and business advice along with the funding. But most of that information is freely available online these days. There’s an abundance of podcasts, videos, and books on the subject if you’re willing to learn. It’s trickier, of course, if you’re building a physical product. But even that has become much easier in recent years. Could you sell a 3D printed prototype before scaling up production? Or launch a Kickstarter campaign to fund your first batch of products? There are now print-on-demand services for t-shirts, mugs, posters, and books. Plus, there are plenty of brick-and-mortar stores you could approach with your business idea if you’re looking to collaborate. Let’s say you do become wildly successful and urgently need capital to scale. Maybe then VC money is one way to go. Or you could take out a loan. Either way, remember that money always comes with strings attached. Funding might force you to do things you’re not comfortable with, like compromising your users’ privacy or your own values. And even if not, you’ll constantly be pressured to find new avenues for growth. Wouldn’t it be nicer to focus on making your product better instead? That’s often far more rewarding. But often, if you spend enough time thinking through a problem, you might find a way to prove your concept at a smaller scale. There Is No Infinite Growth Ask yourself: What truly motivates you? Is it power, money, or fame? If so, there are more meaningful things in life. Your self-worth isn’t tied to building a unicorn. I don’t know who needs to hear this, but it’s perfectly fine to be 23 and not be a millionaire founder. If you’re a high school or college student dreaming of running a startup, know that there’s another path. Don’t sell out your moral compass for a quick buck. You don’t have to jump on the AI bandwagon just because it’s the flavor of the month. At least 144 out of 251 companies from the YC W24 batch are building products with “AI”. That’s 57% of the batch. How many of them are actually doing something meaningful with it? How many will still be around in 5 years? What lasting value do these companies bring to the world? We’re accumulating a massive amount of tech debt while rewarding short-term thinking and profit over sustainability. Sure, Paul will benefit if one of them hits it big. You can read all about it in his next essay. But maybe there’s another way. Maybe it’s okay to have a small business with a loyal user base that pays the bills. Maybe you don’t need to grow exponentially. Maybe you can move slow and fix things instead. " + }, + { + "title": "Asking Better Questions", + "url": "https://endler.dev/2024/asking-better-questions/", + "body": "Recently, I realized that I mostly get paid to ask questions. As a consultant, advising companies As a podcast host In calls with potential clients The curious thing is that, like most people in a similar position, I never had any formal training in asking questions! I basically just wing it and try to get better over time. That got me thinking: What makes a good question? The other day, I reflected on that. Here’s what I came up with. 1. Good Questions Are Open-ended Recently, we did a survey about our podcast, and someone mentioned that some questions I asked the guests were “either-or” type of questions. Q: “Do you prefer dogs or cats?” A: “Dogs.” Ouch, not a very interesting conversation! A better one might have been “What’s your favorite pet?”. It allows for surprising answers. Q: “What’s your favorite pet?” A: “Tarantula!” We have to ask ourselves what we want out of the answer, which leads me to my second observation: 2. Never Ask A Question If You Don’t Care About The Answer Or to rephrase it, “Only ask things you care about.” If you don’t care about the other person’s answer, why even ask? Ask something else you care about instead! For example, when you ask someone if they prefer dogs or cats, what are you really asking? Do you wonder if the person is nice? Do you care about the logistics of owning a pet as a full-time employee? Or do you rather want to hear a certain answer from the other person that happens to fit your narrative? (The obvious correct answer is “cats”.) Be honest! Dig deeper into “why” you’re asking the question. If it’s one of the former two questions, get straight to the point: Q: “Are you a nice person?” A: … Q: “How do you manage having a pet if you have to work all day?” A: … It will lead to better answers. If it’s about your own agenda and you’re just looking for someone to give you the answer you’re hoping for (confirmation bias): don’t. Again, allow them to surprise you! Who knows? You might learn a thing or two about your preconceptions. Which leads to… 3. Good Questions Reveal Something About The Person Who Answers; Bad Questions About The Person Who Asks It’s very easy to slip into a role where you’re framing people, and that lets your bias speak more about you than the person you’re talking to. Be conscious about that so that you can avoid it when it happens. Q: “Why do you love X?” A: “I don’t.” Better: Q: “Tell me something you truly believe about X” A: “I believe that…” The more unique the answer, the more you learn about the person. So I would even say that a good question is one that reveals something unique about the person who answers. What is it that only they can say? What is it that they have unique insight into? Q: “As an expert in X, who has been in the field for 20 years, what is one thing that people always get wrong about X?” A: “People always think that X is about Y, but it’s really about Z.” 4. Good Questions Are Stacked On Top Of Each Other Do you know the Five Whys technique? It’s simple: Ask “why” five times to get to the root of the issue. Q: Are you happy with your job? A: No. Q: Why? A: It drains my energy. Q: Why? A: I have to do boring things. Q: Why? A: My boss thinks they must be done by someone. Q: Why? A: No one got around to automating them. Q: Why? A: We don’t have the skills to automate boring tasks. Aha! If they learn how to automate things, this might lead to better job happiness! That’s insightful. What’s nice is that it was a purely mechanical process. With every step, we dug deeper into the underlying question. At some point, the truth revealed itself. I love this technique. Good questions are built on top of each other. The questions themselves don’t have to be complicated. It can actually be the same question asked a few times in a row – even a child could do that. In fact, they do. A lot! This is how they learn about the world around them. As grown-ups, we should not unlearn this technique. “It’s rude to ask that.” “Don’t pry.” “Don’t be nosy.” We should relearn it! Ask follow-up questions to get to the root of things. But also! Hold the other person accountable. Q: “How can we fix poverty?” A: “I will do everything in my power to fix it.” Q: “But how?” A: “I will try countermeasures which were discussed with…” Q: “Can you give me a concrete example?” A: “…” When someone answers your question, ask yourself if the answer really covered everything you wanted to know. Often, the most interesting pieces are omitted. Sometimes on purpose. But this is the most revealing part, the part at the verge of uncertainty and insecurity and you have to uncover it to get to the heart of the matter! If you don’t do this, conversations stay shallow. Speaking of which… 5. Good Questions Run Deep “Why is the sky blue?” “How do people fall in love?” “Are you happy?” These are simple questions! But they touch on the very foundation of what we know, our perception of the world, and ourselves. The simpler the question, the deeper the answer. Answering with “I don’t know” is totally fine. The important part is to stay curious and to be genuinely interested in the answer. 6. Let The Winners Run And Cut Your Losses Short Sometimes, no matter what you try, there’s just nothing in a conversation. You might have ended up in that weird space where people are simply out of their depth and you turn circles. Cut the cord. Just acknowledge it and move on. Trained conversationalists do it all the time without anyone noticing. The conversation gets boring, so they just move on to the next topic. There doesn’t even have to be a transition. Q: “So, what do you do for a living?” A: “I’m a plumber.” Q: “Oh, interesting. So, what are your plans if you win?” A: … Once you notice that, you will see it everywhere. We do it all the time in our daily lives, too, for example on the phone. Just cut your losses, move on to the next topic. Similarly, if you notice that you both are really into a topic, just run with it. 7. Give People Space To Think Pauses are powerful. Ask your question and then… wait. Just wait. Don’t fill the silence with your own thoughts or insecurities. Let the question speak for itself. If you’re truly interested in a deeper thought, you need to give people time to unpack it - for you and for themselves. If you’re not willing to wait, you’ll miss out on the best part of the conversation. We are trained to give quick answers. If someone asks you “How’s it going?”, the expected answer is “Good, you?”. People are surprised when I take a moment to answer and give them a truly honest answer, which might be deeply personal. Similarly, let people give you their quick answer first. Then wait. Often, they will stop and follow up with a much more personal answer. Use pauses to your advantage. 8. Obvious Questions Can Be The Best Questions If you look close enough, which question to ask becomes obvious. “Why are we still working on this?” “Should we split up?” “How do you feel after the diagnosis?” You’re addressing the elephant in the room! But just because you have an obvious question, that doesn’t make it easy to ask! You might know that the question could hurt. Or that the answer is uncomfortable to handle. And yet, it’s still the right question to ask. What I noticed is that oftentimes multiple people have the same “obvious” question in mind. They are just too afraid to ask. Most people dance around the topic because they want to be polite. They don’t address problems head-on because it’s easier! The result is small talk. Especially if you get paid to ask questions, your job is to ask questions that no one else wants to ask. Often, politics, infighting, and hidden agendas make it very hard for people to break out of their role and ask the obvious question. If you’re not afraid to ask the obvious question, you will look like Houdini. Suddenly, an avalanche of follow-up questions gets unleashed. An honest, constructive conversation emerges. The thing is, just because you don’t ask the uncomfortable question, it doesn’t make the problem go away. In fact, you might make it worse in the long run. It’s easier to get it out of the way and move on! Q: Why are we still working on this? A: Actually, I have no idea. Let’s find something else. Q: Should we split up? A: Yes, I think so. Let’s talk about it. Q: How do you feel after the diagnosis? A: I’m scared. I don’t know what to do. But I’m glad you asked. The truth is hidden in plain sight. If no one dares to ask, these questions stay unanswered. The trick is to accept the answer for what it is. Don’t be mad or angry at people who honestly answer your question. Be mad or angry for not asking sooner. If You Don’t Understand The Answer, Ask Again In the past, I would often gloss over an answer and pretend I understood it. Turns out that it was a bad strategy: In the best case, I would have missed out on a great opportunity to learn something new. In the worst case, I wouldn’t have a good question to follow up with. Funnily, I’m rarely alone with this problem. There’s usually more than one confused person in the room. So don’t be afraid to refine your question to clarify any misunderstandings. Q: “Can you explain that differently?” A: … Q: “Am I correct in understanding that you mean…?” A: … Q: “Can you give me an example?” A: … There’s an old Chinese proverb that goes like this: He who asks a question is a fool for five minutes; he who does not ask a question remains a fool forever. Good Observations Yield Good Questions I noticed that many people who ask great questions have exceptional observation skills. They notice things that others don’t. Q: “Why are you still working here?” A: “I need the money.” Q: “Tough luck. What’s for lunch?” A: … vs. Q: “Why are you still working here?” A: “I need the money.” Q: “I noticed that you have this book on your desk. What’s that about?” A: “Oh, that’s my passion! I’m reading up on sales because I want to start my own business.” Q: “Nice! What is it about sales that you find so interesting?” A: … See how this tiny observation about a book on the desk led to a much deeper conversation? Suddenly, you’re talking about someone’s passion and dreams. You might inspire them to have more conversations with customers during work time to get some practice. If you want to improve your observation skills, come prepared. Learn more about the person you’re talking to. What are they passionate about? Who’s their role model? What are they doing in their free time? What are they reading? Which projects are they working on? If you come prepared, you’ll have an easier time asking good questions. Context makes good questions obvious. The Best Follow-up To An Answer Is A Question Okay, you got an answer. Now what? Many people follow up with a statement about themselves. “Oh, I also like that!” “Yes, I also did that!” “Let me tell you about my experience!” “You should do this!” This shows that you’re not interested in the other person, only in yourself. Instead, what if you could only follow up with a question? Q: “What’s your favorite sport?” A: “Table tennis.” Q: “How did you get into that?” A: “My dad played it when I was young.” Q: “What’s your favorite memory of playing table tennis with your dad?” A: “We used to play in the basement. It was so much fun!” Q: “What made it so much fun?” A: “I don’t know. It was just the two of us. It was our thing.” Q: “What did you learn from your dad about table tennis?” A: “He taught me how to serve. He was really good at it.” Q: “What’s the most important thing about serving in table tennis?” A: “You have to hit the ball at the right angle. Otherwise, it’s easy to return.” Q: “What’s the most difficult angle to return?” A: “The one that goes straight to the corner. It’s hard to reach.” See how this conversation unfolded? It’s like peeling an onion. You get deeper and deeper into the topic. You learn more about the person. You learn more about the topic. Step by step, you tap into someone else’s hard-earned wisdom. Look at all the things you learned in such a short time! Their favorite sport, their childhood memories, their relationship with their dad, how to serve in table tennis, the most difficult angle to return a serve. If you had followed up with “I like that too,” you would have missed out on all of this and probably never learned about it. All just because we kept asking questions instead of making statements. Think about all the people you know. Who do you like to talk to? I bet it’s the people who ask you questions and listen to your answers instead of talking about themselves all the time. Interestingly, it’s entirely selfish to ask questions. You learn more about the other person than they learn about you. You’re in control of the conversation. Summary I’m still not good at asking questions. I wrote this mostly as practice for myself. Maybe it helps someone else too. Here’s a summary of what I wrote: Good Questions Are Open-ended Never Ask A Question If You Don’t Care About The Answer Good Questions Reveal Something About The Person Who Answers; Bad Questions About The Person Who Asks Good Questions Are Stacked On Top Of Each Other Good Questions Run Deep Let The Winners Run And Cut Your Losses Short Give People Space To Think Obvious Questions Can Be The Best Questions If You Don’t Understand The Answer, Ask Again Good Observations Yield Good Questions The Best Follow-up To An Answer Is A Question Don’t be mistaken! Asking good questions is hard work! You have to be present, you have to listen, you have to reflect. It helps to take mental notes while you’re talking to someone. After a while, you will get better at spotting the patterns. Asking good questions is a skill that can be learned and improved upon. Asking more questions can’t hurt along the way. So, how do you ask better questions? " + }, + { + "title": "The Dying Web", + "url": "https://endler.dev/2024/the-dying-web/", + "body": "I look left and right, and I’m the only one who still uses Firefox. At conferences and in coworking spaces, it’s always the same scene: people using some flavor of Chrome. Sometimes it’s Brave, sometimes Chromium, most of the time it’s just Google Chrome. I find that hilariously appalling. An entire generation grew up with access to great free tools and open standards, which helped them jumpstart their careers and gave them access to excellent technology for free. Now, the world’s largest websites are owned by the same company, which also owns the world’s most popular browser and search engine. Coincidentally, they are also the world’s largest advertising company. And people are wondering why they can’t block ads on YouTube anymore. We gave it all away for nothing. Let me be the first to admit that I too am not without sin. There was a weak moment about 15 years ago when browser performance became so unbearable on anything other than Chrome that it forced my hand to make the switch. And yes, for a while, life was good and websites loaded quickly. Reluctantly, I made the switch back to Firefox after a while, because open standards and privacy were more important than a few milliseconds of loading time. I could still understand why people would use Chrome, but I was happy with my choice. Then Firefox Quantum came around, and I told all my fellow developer friends about it. To me, it was the best browser on the market, and I was proud to be a Firefox user again. It was fast, snappy, and had a great add-on ecosystem. To my surprise, nobody cared. Bad Habits Die Hard Maybe people stayed with Chrome out of habit. Performance and privacy aside, I just don’t know how people can cope with Chrome’s limited customizability. It’s hilarious to watch people with 200 tabs named “G”, “Y”, or “X” struggle to find that one document they opened a week ago. In comparison, vertical tabs on Firefox with add-ons like Sidebery make Chrome look like a toy. Anyhow, Chrome. There was a time when I tried to educate people on the negative effects of browser monoculture. Okay, my mum didn’t get it, but I was more disappointed by my fellow devs. Everyone took the easy route and happily stayed on Uncle Google’s lap. At this point, I neither have the willpower nor the energy to fight back; it’s hopeless. It’s probably easier to get blood from a stone than to convince someone to switch back to Firefox. It’s so easy to switch, you won’t even lose any open tabs! Nobody Forces You to Use Chrome True, but the issues don’t stop at my front door. As an outsider, I need to live with the consequences of browser monoculture every day. Quite a few websites are unusable by now because they got “optimized for Chrome.” Microsoft Teams, for example, and the list is long. These websites fail for no good reason. There are positive examples, too. Zencastr, for example, used to be broken on Firefox, but they fixed it. Update: Zencastr is still broken on Firefox. Thanks to Randell from Mozilla for pointing that out. Their support page states that Chrome, Edge, or Brave are required. They stopped supporting Firefox in February 2021 as per this blog post. There’s an open ticket in the Mozilla Bugzilla tracking this issue. It’s currently blocked on some other issues, but there’s progress being made. The WebCodecs API, which might be related to this problem, is currently in beta and progressing through Mozilla’s release process. While it’s disappointing that Zencastr doesn’t work on Firefox yet, it’s encouraging to see that Mozilla is actively working on resolving the underlying issues. I also use Chrome for online calls, because tools like Jitsi don’t work well on Firefox. Maybe it’s because of Firefox’s WebRTC support? Or, maybe it’s because of Chrome: Pop Quiz: If a website wants to play out of different speakers on your system, what permission must it have? Speaker-selection permission Microphone permission If you answered 2, then chances are you know your WebRTC stuff well, but you’re probably on a Chromium browser. How could Google get free rein? Because everyone and their car stopped testing their stuff anywhere else. If everyone tweaks their site for Chrome, well, of course the site will work just fine on Chrome! We find ways around Chrome’s weird quirks. More users join the bandwagon because stuff “just works” and the vicious cycle continues. I can’t blame them. It’s easier to ride a horse in the direction it is going. But at what cost? *Elrond voice*: We’ve been down this road before. (Okay, I was there.) We called it the Browser Wars: Netscape vs Internet Explorer. Netscape lost and Microsoft ruled over the web with an iron fist. It wasn’t fun. We had more hacks around browser limitations than actual website functionality. Parents put their kids through college by working around browser bugs for money. Microsoft tried really hard to make life as miserable as possible for everybody: Internet Explorer has introduced an array of proprietary extensions to many of the standards, including HTML, CSS, and the DOM. This has resulted in several web pages that appear broken in standards-compliant web browsers and has introduced the need for a “quirks mode” to allow for rendering improper elements meant for Internet Explorer in these other browsers. — Wikipedia Essentially, they broke the web and we all warmed our hands on the dumpster fire. All we got in return was quirks mode. Google is smarter! They break the web, too, but they make you stand inside the fire. Why should I care about a browser? They are all the same anyways. …says the developer who gets tracked by Google every waking moment. Source: https:&#x2F;&#x2F;www.skeletonclaw.com&#x2F;image&#x2F;710734055173472257 You see, Chrome is reeeeally good at marketing. They say all the right things: We’re fast! We’re open source! We have the latest features! What they don’t tell you is that they control the narrative of the World Wide Web. They make you feel guilty for using adblockers and add weird nonstandard browser features because they can. Lately, the uBlock Origin team just threw in the towel and stopped supporting Chrome. But did anyone decide to jump ship? I get the feeling that by now people turn a blind eye to Google’s evil practices. But shouldn’t Brave, Edge, Opera, or Vivaldi be sufficient? Unfortunately not. They all use the same browser engine under the hood. Browser makers make mistakes, so this engine is not perfect. If it contains a bug and there’s no competition, that bug becomes the standard. Alternative browser engines need to implement the bug as well to support websites which depend on it. I Use Safari Congratulations, you switched from a browser controlled by a 2 trillion dollar company to a browser controlled by a 3 trillion dollar company. Oh, and it doesn’t run on Windows or Linux. Both Apple and Google would throw you under the bus if it made them more profit. Oh, and did you know that Google paid Apple 20 billion dollars in 2022 to be the default search engine on Safari? What Can I Do? If you’ve made it this far, do yourself a favor and spend a few minutes trying Firefox. Who knows? You might just like it. Try Firefox today, please? " + }, + { + "title": "How To Sell To Developers", + "url": "https://endler.dev/2024/selling-to-developers/", + "body": "One of the hardest challenges I know is how to sell to developers. This is NOT an article for developers. Today, I want to write for non-developers whose job it is to sell to developers. My goal is to help you understand how they think. Developers Hate Being Sold To We tend to be a skeptical bunch! We’re not reachable through Google ads because we use ad blockers. We expect software to be free and open source, so we don’t usually pay for it. We subscribe to a handful of services that provide a lot of value for very little cost. Everything else we build ourselves. See the problem? How do you sell to these people? The Developer’s Dilemma Strangely enough, when developers think about starting a business, the first products they consider are often those aimed at other developers, despite there being many easier markets to sell to! We love building things so much that we try to sell the things we build to people just like us. Here’s my advice: Don’t build a product for developers. Seriously, don’t build a product for developers. Since you’ll likely ignore the first two pieces of advice, at least learn how to market to developers effectively. What You Need to Know About Marketing to Developers Here’s the bitter truth. To sell to developers… You will have to be present in the places where they hang out. You will have to invest a lot of time in community building. You will have to be extremely patient. There is no shortcut. What if you don’t have the time or the patience? See rule 1 above. My Developer Product Journey So Far You might ask, “Why should I listen to you? Have you ever sold a product to developers?” I’ll let you be the judge of that. Here are a few products that I have built and sold to developers: codeprints Homepage of CodePrints with my GitHub profile as a print This was a fun experiment during the pandemic. We sold posters of GitHub timelines. Our marketing strategy included posts on Reddit and Hacker News, and we sent free prints to a few “devfluencers.” The novelty effect and viral marketing worked well. Eventually, we sold the company to a developer agency because we didn’t want to deal with the logistics. Read more about CodePrints in this blog post. Lychee Homepage of Lychee’s documentation page I built this tool for myself because I needed it. Lychee is a command-line tool that checks for broken links in markdown and HTML files. It has become quite popular on GitHub, with companies like Google and Amazon using it. However, I’ve never made any money from it. I reached out to some companies to sponsor the project, but it was very hard to get a response or explain why they should sponsor a free tool. I wrote about making money with Open Source here. Analysis Tools Homepage of analysis-tools.dev This is a directory of tools for developers. It’s a side project that I started in 2015 (did I mention you need to be patient?), and it has grown to be a popular resource. Together with two friends, we sell sponsorships and ads on the site, which provides a nice side income. I think this project works because it’s not a product for developers but a product for people who build products for developers. Our customers are mostly developer advocates, developer relations folks, and marketing people who want to reach developers. Sponsoring on Analysis Tools is extremely cheap compared to other marketing channels like Google ads. On Google, you can easily spend thousands of dollars per month trying to reach developers. On Analysis Tools, you can reach them for a fraction of that cost. The basic tier is $100 per month, and for that, you get your logo on the site and in every repo. Thousands of developers visit the site every month. It’s basically a no-brainer to sponsor the site if you build a linter, a static code analyzer, or any other tool that developers use. Tools page of analysis-tools.dev Think of it this way: when was the last time these companies had the chance to reach thousands of developers for $100? What is the total cost of acquisition for a developer? How much would you pay for a developer to try out your tool? And still… Only 20% Of Companies Immediately Get It From the companies I’ve talked to, only around 20% get it immediately. There are perhaps 30% that need a little handholding. The other 50% see it as an ad placement and want to know the click-through rate. Let me repeat: It’s not about the click-through rate. It’s about the branding and the reach. You will never reach a critical audience if you don’t start investing in the developer community. This is not a one-time thing. You need a lot of exposure until you get noticed and developers start talking about your product. If you try to fake it, you won’t get far. Devs will avoid your product like the plague and tell their friends to do the same. Your main constraint is the attention of developers! These folks constantly get bombarded with millions of products and don’t have time to evaluate them all. They stopped actively looking for new tools a long time ago. They rely on a handful of trusted sources they follow. Building Trust Is The Only Way Getting into their circle of trust is hard, and for that reason, you need to be patient and invest a lot of time into relationship building. If you can afford it, hire a developer advocate. A good one is heavily invested in open source and knows how to write for developers. At the very least, you need to be present in the places where developers hang out. So reach out to open source maintainers in your niche and see if you can collaborate or sponsor their projects. Don’t waste cash on Google ads or other traditional marketing channels. Go where the developers are. I hope this article helped someone who is struggling to sell to developers. Perhaps one of you will reach out to some open source maintainers in their niche and sponsor their projects. That would be a great outcome! There’s a plus side to all of this: once you understand that you need to invest in the developer community, you will have a competitive advantage because most other companies don’t get it. " + }, + { + "title": "Cursed Rust: Printing Things The Wrong Way", + "url": "https://endler.dev/2023/cursed-rust/", + "body": " document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); There is a famous story about a physicist during an exam at the University of Copenhagen. The candidate was asked to describe how to determine a skyscraper’s height using a barometer. The student suggested dangling the barometer from the building’s roof using a string and then measuring the length of the string plus the barometer’s height. Although technically correct, the examiners were not amused. After a complaint and a reevaluation, the student offered various physics-based solutions, ranging from dropping the barometer and calculating the building’s height using the time of fall, to using the proportion between the lengths of the building’s shadow and that of the barometer to calculate the building’s height from the height of the barometer. He even humorously suggested simply asking the caretaker in exchange for the barometer. The physicist, as the legend goes, was Niels Bohr, who went on to receive a Nobel Prize in 1922. This story is also known as the barometer question. Why Is This Story Interesting? The question and its possible answers have an important didactic side effect: they convey to the learner that one can also get to the solution with unconventional methods — and that these methods are often more interesting than the canonical solution because they reveal something about the problem itself. There is virtue in learning from unconventional answers to conventional questions. To some extent, this fosters new ways of thinking and problem-solving, which is an essential part of innovation. Applying The Same Principle To Learning Rust One of the first examples in any book on learning Rust is the “Hello, world!” program. fn main() println!(&amp;quot;Hello, world!&amp;quot;); It’s an easy way to test that your Rust installation is working correctly. However, we can also have some fun and turn the task on its head: let’s find ways to print “Hello, world!” without using println!. Let’s try to come up with as many unconventional solutions as possible. The weirder, the better! As you go through each of the solutions below, try to understand why they work and what you can learn from them. This started as a meme, but I decided to turn it into a full article after the post got a lot of attention. It goes without saying that you should never use any of these solutions in production code. Check out this enterprise-ready version of hello world instead. Solution 1: Desugaring println! use std::io::Write; write!(std::io::stdout().lock(), &amp;quot;Hello, world!&amp;quot;); This solution is interesting, because it shows that println! is just a macro that expands to a call to write! with a newline character appended to the string. The real code is much weirder. Search for print in this file if you want to be amazed. write! itself desugars to a call to write_fmt, which is a method of the Write trait. There is a real-world use case for this: if you want to print things really fast, you can lock stdout once and then use write!. This avoids the overhead of locking stdout for each call to println!. See this article on how to write a very fast version of yes with this trick. Solution 2: Iterating Over Characters &amp;quot;Hello, world!&amp;quot;.chars().for_each(|c| print!(&amp;quot; &amp;quot;, c)); This shows that you can implement println! using Rust’s powerful iterators. Here we iterate over the characters of the string and print each one of them. chars() returns an iterator over Unicode scalar values. Learn more about iterators here. Solution 3: Impl Display struct HelloWorld; impl std::fmt::Display for HelloWorld fn fmt(&amp;amp;self, f: &amp;amp;mut std::fmt::Formatter&amp;lt;&amp;#39;_&amp;gt;) -&amp;gt; std::fmt::Result write!(f, &amp;quot;Hello, world!&amp;quot;) println!(&amp;quot; HelloWorld &amp;quot;); This teaches us a little bit about how traits work in Rust: We define a struct that implements the Display trait, which allows us to print it using print!. In general, Display is intended to make more complex types printable, but it is also possible to implement it for a hardcoded string! Solution 4: Who Needs Display? How about we create our own trait instead of using Display? trait Println fn println(&amp;amp;self); impl Println for &amp;amp;str fn println(&amp;amp;self) print!(&amp;quot; &amp;quot;, self); &amp;quot;Hello, world!&amp;quot;.println(); We can exploit the fact that we can name our trait methods however we want. In this example, we choose println, making it look like it is part of the standard library. This completely turns the println! macro on its head. Instead of passing a string as an argument, we call a method on the string itself! Solution 5: Who Needs println! When You Got panic!? panic!(&amp;quot;Hello, world!&amp;quot;); There are other ways to print things in Rust than using println!. In this case, we use panic!, which prints the string (as a side-effect) and immediately terminates the program. It works as long as we only want to print a single string… Solution 6: I ♥︎️ Closures (|s: &amp;amp;str| print!(&amp;quot; &amp;quot;, s))(&amp;quot;hello&amp;quot;); Rust allows you to call a closure directly after its definition. The closure is defined as an anonymous function that takes a string slice as an argument and prints it. The string slice is passed as an argument to the closure. In practice, this can be useful for defining a closure that is only used once and for which you don’t want to come up with a name. Solution 7: C Style extern crate libc; use libc:: c_char, c_int ; use core::ffi::CStr; extern &amp;quot;C&amp;quot; fn printf(fmt: *const c_char, ...) -&amp;gt; c_int; fn main() const HI: &amp;amp;CStr = match CStr::from_bytes_until_nul(b&amp;quot;hellon0&amp;quot;) Ok(x) =&amp;gt; x, Err(_) =&amp;gt; panic!(), ; unsafe printf(HI.as_ptr()); You don’t even need to use Rust’s standard library to print things! This example shows how to call the C standard library’s printf function from Rust. It’s unsafe because we are using a raw pointer to pass the string to the function. This teaches us a little bit about how FFI works in Rust. Credit goes to &#x2F;u&#x2F;pinespear on Reddit and @brk@infosec.exchange. Solution 8: C++ Style We’re well into psychopath territory now… so let’s not stop here. If you try extremely hard, you can bend Rust to your will and make it look like C++. use std::fmt::Display; use std::ops::Shl; #[allow(non_camel_case_types)] struct cout; #[allow(non_camel_case_types)] struct endl; impl&amp;lt;T: Display&amp;gt; Shl&amp;lt;T&amp;gt; for cout type Output = cout; fn shl(self, data: T) -&amp;gt; Self::Output print!(&amp;quot; &amp;quot;, data); cout impl Shl&amp;lt;endl&amp;gt; for cout type Output = (); fn shl(self, _: endl) -&amp;gt; Self::Output println!(&amp;quot;&amp;quot;); cout &amp;lt;&amp;lt; &amp;quot;Hello World&amp;quot; &amp;lt;&amp;lt; endl; The Shl trait is used to implement the &amp;lt;&amp;lt; operator. The cout struct implements Shl for any type that implements Display, which allows us to print any printable type. The endl struct implements Shl for cout, which prints the newline character in the end. Credit goes to Wisha Wanichwecharungruang for this solution. Solution 9: Unadulterated Control With Assembly All of these high-level abstractions stand in the way of printing things efficiently. We have to take back control of your CPU. Assembly is the way. No more wasted cycles. No hidden instructions. Pure, unadulterated performance. use std::arch::asm; const SYS_WRITE: usize = 1; const STDOUT: usize = 1; fn main() #[cfg(not(target_arch = &amp;quot;x86_64&amp;quot;))] panic!(&amp;quot;This only works on x86_64 machines!&amp;quot;); let phrase = &amp;quot;Hello, world!&amp;quot;; let bytes_written: usize; unsafe asm! &amp;quot;syscall&amp;quot;, inout(&amp;quot;rax&amp;quot;) SYS_WRITE =&amp;gt; bytes_written, inout(&amp;quot;rdi&amp;quot;) STDOUT =&amp;gt; _, in(&amp;quot;rsi&amp;quot;) phrase.as_ptr(), in(&amp;quot;rdx&amp;quot;) phrase.len(), &#x2F;&#x2F; syscall clobbers these out(&amp;quot;rcx&amp;quot;) _, out(&amp;quot;r11&amp;quot;) _, assert_eq!(bytes_written, phrase.len()); (Rust Playground) If you’re wondering why we use Rust in the first place if all we do is call assembly code, you’re missing the point! This is about way more than just printing things. It is about freedom! Don’t tell me how I should use my CPU. Okaaay, it only works on x86_64 machines, but that’s a small sacrifice to make for freedom. Submitted by isaacthefallenapple. Solution 10: “Blazing Fast” Why did we pay a premium for all those CPU cores if we aren’t actually using them? Wasn’t fearless concurrency one of Rust’s promises? Let’s put those cores to good use! use std::sync:: Arc, Mutex ; use std::thread; use std::time::Duration; fn main() let phrase = &amp;quot;hello world&amp;quot;; let phrase = Arc::new(Mutex::new(phrase.chars().collect::&amp;lt;Vec&amp;lt;_&amp;gt;&amp;gt;())); let mut handles = vec![]; for i in 0..phrase.lock().unwrap().len() let phrase = Arc::clone(&amp;amp;phrase); let handle = thread::spawn(move || thread::sleep(Duration::from_millis(((i + 1) * 100) as u64)); print!(&amp;quot; &amp;quot;, phrase.lock().unwrap()[i]); ); handles.push(handle); for handle in handles handle.join().unwrap(); println!(); Here, each character is printed in a separate thread. The threads are spawned in a loop, and each thread sleeps for a certain amount of milliseconds before printing its character. This uses the full power of your CPU to print a string! It might not always consistently print the characters in the right order (hey, scheduling is hard!), but that’s a worthwhile trade-off for all the raw performance gains. Your Turn! If you’ve got more solutions, please send me a message. Also, if you liked this article, you might also enjoy the yearly obfuscated C code contest. Check out the previous winners here. If you were actually more intrigued by the barometer story, read Surely You’re Joking, Mr. Feynman!, a book by Richard Feynman, another famous physicist and Nobel Prize winner, who was known for his unconventional way of thinking. We should all strive to think outside the box and come up with unconventional solutions to problems. Who knows, maybe that’s the key to a deeper understanding of the problem itself? " + }, + { + "title": "Deploy Rust Code Faster", + "url": "https://endler.dev/2023/move-fast-rust/", + "body": "I’ve come a long way in my tech journey, from dealing with bare metal servers to exploring the world of cloud computing. Initially, it seemed so straightforward – spin up a server, deploy a container, and you’re done. But as I delved deeper, I realized that the ease of infrastructure is not as simple as it appears. Cloud providers offer a multitude of tools, each with its own learning curve: Google Cloud &#x2F; AWS Kubernetes Helm Docker Terraform GitHub Actions If you’re adventurous, you might even venture into managed Kubernetes services like EKS or GKE. It’s tempting, with just a few clicks, your application is ready to roll. But the reality hits when you start juggling monitoring, logging, security, scaling, and more. Soon, you find yourself unintentionally leading a DevOps team instead of focusing on your product. You hire more staff to manage infrastructure while your competitors are shipping features and growing their user base. My Frustration The cloud promised to make infrastructure easy, but the array of tools and services can be overwhelming. Even if you don’t use them all, you must be aware of their existence and learn the basics. The result? Your focus on the product diminishes. I appreciate dealing with infrastructure, but I also love delivering a product. Sadly, many companies waste precious time and money on infrastructure, repeating the same mistakes. What if there was a way to eliminate infrastructure concerns altogether? The Allure of Serverless Serverless architecture seems promising - no servers, no containers, just pure business logic. However, it’s not without challenges: Cold start times Lambda size limitations Memory issues Long-running processes Debugging complexities Lack of local testing Serverless has its merits for certain use cases, but for larger applications, you might still need some servers. Platform-As-A-Service (PaaS) Platforms like Heroku and Netlify introduced a third option – managed services that handle all infrastructure for you. No more infrastructure concerns; you simply push code, and it deploys. What’s great about these solutions is their deep integration with specific programming language ecosystems. I was looking for a platform tailored for Rust developers, aiming to provide a top-notch developer experience. I wanted deep integration with the Rust ecosystem (serde, sqlx, axum,…). A while ago, I came across Shuttle while trying to find ways to make my Rust development workflow a bit smoother. It’s a tool that kind of just fits into the existing Rust ecosystem, letting you use cargo as you normally would, but with some of the infrastructural heavy lifting taken out of the picture. Now, it’s not a magic wand that solves all problems, but what I appreciate about Shuttle is its simplicity. You’re not thrown into a completely new environment with a steep learning curve. Instead, you stick to your Rust code, and Shuttle is there in the background, helping manage some of the server-side complexities. So, in essence, it’s about sticking to what you know, while maybe making life a tad easier when it comes to deployment and server management. It’s not about a revolutionary change in how you code, but more about a subtle shift in managing the background processes that can sometimes be a bit of a headache. My Shuttle Experience So Far Until now, I built two smaller Rust services with Shuttle: Zerocal and Readable. Shuttle takes your Rust code and with very few annotations, it can be deployed to the cloud. The developer experience is pretty close to ideal given that provisioning and deployment are usually the most painful parts of building a service. Instead, it’s just a matter of adding a few lines of code. See for yourself. The boilerplate just vanishes. What’s left is the business logic. Your browser does not support the video tag. :( But the video is great. Zerocal - Stateless Calendar Magic Zerocal was the first project I deployed on Shuttle. The principle was very simple yet innovative: encode calendar data directly into a URL. This means creating an event was as straightforward as: curl https:&#x2F;&#x2F;zerocal.shuttleapp.rs?start=2023-11-04+20:00&amp;amp;duration=3h&amp;amp;title=Birthday&amp;amp;description=paaarty This would return an iCal file, that you can add to your calendar. Here’s how you create an event in the browser: I tried building this project on Shuttle when they were still fixing some things and changing their APIs here and there. Even with these small issues, it was a good experience. In just a few minutes, my app was up and running. Here’s the code to start the service including the axum routes: #[shuttle_runtime::main] async fn axum() -&amp;gt; shuttle_axum::ShuttleAxum &#x2F;&#x2F; just normal axum routes let router = Router::new() .route(&amp;quot;&#x2F;&amp;quot;, get(calendar)) .route(&amp;quot;&#x2F;&amp;quot;, post(calendar)); Ok(router.into()) I don’t really need Zerocal for myself anymore, so I’m hoping someone else might want to take it over. I think it could be really useful for sharing invites on places like GitHub or Discord. If you want to know more about Zerocal, you can read this detailed breakdown. I would also like to mention that someone else built a similar project inspired by Zerocal: kiwi by Mahesh Sundaram, written in Deno. This is a really cool outcome. A Reader Mode For My E-Reader My appreciation for Firefox’s reader view sparked the creation of a Reader Mode Proxy for a minimalist, JavaScript-free web reading experience, particularly tailored for e-readers. The intention was to transform verbose websites into a more digestible format for distraction-free reading. This project deeply reflected my personal preferences, as I like simple apps that solve a problem. With just a sprinkle of annotations, my code adapted smoothly to Shuttle’s environment. Initially, I had my own local mode, which allowed me to run the app on my machine for testing, but I found no need to maintain that because Shuttle’s own local mode works just as well. While developing the app, there were some bumps along the road. Service downtimes required some code revamping. Yet, Shuttle’s evolution simplified parts of my process, especially when it introduced native static file handling. Before it looked like this: #[shuttle_runtime::main] async fn axum() -&amp;gt; shuttle_axum::ShuttleAxum let router = Router::new() &#x2F;&#x2F; Previously, I needed to manually serve static files .route( &amp;quot;&#x2F;static&#x2F;Crimson.woff2&amp;quot;, get(|| async static_content( include_bytes!(&amp;quot;..&#x2F;static&#x2F;fonts&#x2F;Crimson.woff2&amp;quot;,), HeaderValue::from_static(&amp;quot;text&#x2F;woff2&amp;quot;), ) ), ) .route( &amp;quot;&#x2F;static&#x2F;JetBrainsMono.woff2&amp;quot;, get(|| async static_content( include_bytes!(&amp;quot;..&#x2F;static&#x2F;fonts&#x2F;JetBrainsMono.woff2&amp;quot;,), HeaderValue::from_static(&amp;quot;font&#x2F;woff2&amp;quot;), ) ), ) .fallback(readable); Ok(router.into()) Now it’s just #[shuttle_runtime::main] async fn axum() -&amp;gt; shuttle_axum::ShuttleAxum let router = Router::new() .nest_service(&amp;quot;&#x2F;static&amp;quot;, ServeDir::new(PathBuf::from(&amp;quot;static&amp;quot;))) .fallback(readable); Ok(router.into()) To understand the intricacies of this project, here’s a more comprehensive look. Control and Safety Initially, I was concerned that annotating my code for infrastructure would cause vendor lock-in. I wanted to retain full control over my project. Want to move away? The Shuttle macros get rid of the boilerplate, so I could just remove the 2 annotations I’ve added and get the original code back. Shuttle’s code is also open source, so I could even set up your self-hosted instance — although I wouldn’t want to. The True Cost of DIY Infrastructure Infrastructure may seem easy on the surface, but maintaining it involves various complexities and costs. Updates, deployments, availability – it can be overwhelming. Each hour spent on these tasks carries both a direct and opportunity cost. Infrastructure can be a maze, and Shuttle seems to fit well for those working with Rust. I’m thinking of trying out a larger project on Shuttle soon, now that I’ve got a decent understanding of what Shuttle can and can’t do. If you’re considering giving it a shot, it’s wise to check their pricing to ensure it aligns with your needs. Be mindful of the real cost of infrastructure! As I’ve mentioned before, it’s not just server costs, but a lot more. The biggest factor will probably be human labor for maintenance and debugging infrastructure and that is expensive. If I were to use infrastructure as code, I’d be spending many hours setting up my infrastructure and a lot more to maintain it and that can be expensive, given today’s salaries. Even if it was just for a hobby project, it would not be worth the trouble for me. I’d much rather work on features than the code that runs it all. " + }, + { + "title": "Little Helpers", + "url": "https://endler.dev/2023/helpers/", + "body": "Yesterday I couldn’t help but feel a sense of awe at all the conveniences modern life has to offer. A lot of the chores in our household are taken care of by little helpers: The dishwasher washes the dishes, the washing machine washes the clothes, and the robot vacuum cleaner cleans the floors. The refrigerator keeps our food cold, the microwave heats it up, and the oven cooks it. We take all of this for granted because the devices rarely fail, but it’s really amazing when you think about it. It’s only been a few decades since much of this was tedious, time-consuming, manual labor. I heard stories about how people used to watch the washing machine do its thing, just because it was entertaining to see the machine do their work for them. document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); Growing up in the 90s and early 2000s, I remember when “smart home” was a buzzword, and now it’s a reality. Smart devices control the thermostat and soon the lights and the door locks in our apartment. Of course there were a bunch of stupid ideas that didn’t work out along the way. I remember when they tried to sell those “smart” fridges that would run a web browser and let you order groceries from the fridge. Who would want to do that? It’s so much easier to just order groceries online from your phone or computer. On the other hand, of all the people I talked to, I’ve never met anyone who regrets buying a vacuum robot. We recently got a cat and quickly automated all the tedious stuff. The litter box cleans itself, there’s a water fountain that keeps the water fresh, and soon we’ll get a food dispenser. That means we have more time to focus on the fun stuff, like playing with the cat. And yes, I fully realize that this convenience comes from an incredible position of privilege. A privileged position that we should never take for granted! Instead, we should be grateful for the little helpers that make our lives easier and make them more accessible to everyone. " + }, + { + "title": "A Reader Mode Proxy for the Slow Web", + "url": "https://endler.dev/2022/readable/", + "body": " Reader showing an article in light and dark mode. tl;dr: I built a service that takes any article and creates a pleasant-to-read, printable version. It is similar to Reader View in Firefox&#x2F;Safari, but also works on older browsers, can be shared and has a focus on beautiful typography. Check out the source code. The web used to be such a fun place. Nowadays? Meh. Trackers, ads, bloat, fullscreen popups, autoplaying videos… it’s all so exhausting. I just want to read long-form posts without distractions with a good cup of tea, the cat sleeping on the windowsill and some light snow falling in front of the window. The Slow Web I’m a big fan of the Slow Web movement and of little sites that do one thing well. For reading long-form text clutter-free I use Reader View in Firefox, and while it doesn’t always work and it’s not the prettiest I like it. There are reader modes in other browsers as well, but some of them — like Chrome — hide it behind a feature flag. Other browsers, like the one on my eBook reader, don’t come with a reader mode at all, which leaves me with a subpar and slow browsing experience on my main device used for reading. So I built a reader mode as a service with a focus on beautiful typography which works across all browsers. It’s very basic, but I use it to read articles on my older devices and it could also make content more accessible in regions with low bandwidth or while travelling. Building It Lately I saw a post about circumflex, a Hacker News terminal client. The tool did a solid job at rendering website content and I wondered if I can retrofit that into a proxy server. The Golang cleanup code is here: func GetArticle(url string, title string, width int, indentationSymbol string) (string, error) articleInRawHTML, httpErr := readability.FromURL(url, 5*time.Second) if httpErr != nil return &amp;quot;&amp;quot;, fmt.Errorf(&amp;quot;could not fetch url: %w&amp;quot;, httpErr) &#x2F;&#x2F; ... They use go-readability, a port of Mozilla’s Readability. The Rust equivalent is readability and it’s simple enough to use: use readability::extractor; fn main() -&amp;gt; Result&amp;lt;(), Box&amp;lt;dyn std::error::Error&amp;gt;&amp;gt; let response = extractor::scrape(&amp;quot;https:&#x2F;&#x2F;endler.dev&#x2F;2022&#x2F;readable&amp;quot;)?; println!(&amp;quot; &amp;quot;, response.content); Ok(()) Before we write a full proxy server, let’s write a simple CLI tool that takes a URL and outputs a clean, readable HTML file. use readability::extractor; use std::fs::File; use std::io::Write; fn main() -&amp;gt; Result&amp;lt;(), Box&amp;lt;dyn std::error::Error&amp;gt;&amp;gt; &#x2F;&#x2F; read the URL from the command line let url = std::env::args().nth(1).expect(&amp;quot;Please provide a URL&amp;quot;); let response = extractor::scrape(&amp;amp;url)?; let mut file = File::create(&amp;quot;index.html&amp;quot;)?; file.write_all(response.content.as_bytes())?; Ok(()) The output already looked surprisingly good. Next I added a simple HTML template to wrap the response content. &amp;lt;!DOCTYPE html&amp;gt; &amp;lt;html lang=&amp;quot;en&amp;quot;&amp;gt; &amp;lt;head&amp;gt; &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot; &#x2F;&amp;gt; &amp;lt;title&amp;gt;Document&amp;lt;&#x2F;title&amp;gt; &amp;lt;link rel=&amp;quot;stylesheet&amp;quot; href=&amp;quot;yue.css&amp;quot; &#x2F;&amp;gt; &amp;lt;style type=&amp;quot;text&#x2F;css&amp;quot;&amp;gt; body margin: 0; padding: 0.4em 1em 6em; background: #fff; .yue max-width: 650px; margin: 0 auto; &amp;lt;&#x2F;style&amp;gt; &amp;lt;&#x2F;head&amp;gt; &amp;lt;body&amp;gt; &amp;lt;div class=&amp;quot;yue&amp;quot;&amp;gt; content &amp;lt;&#x2F;div&amp;gt; &amp;lt;&#x2F;body&amp;gt; &amp;lt;&#x2F;html&amp;gt; No need to use a full-blown template engine for now; we can just use str::replace to replace the content placeholder with the actual content. 😉 Proxy Setup The proxy setup is super simple with shuttle. It’s my second project after zerocal, which is hosted on shuttle and I’m very happy with how smooth the process is. 🚀 Let’s call the app readable: cargo shuttle init --axum --name readable This creates a small Axum app with a simple hello world route. Roadblock No. 1: reqwest When I integrated the readability crate into the project I hit a minor roadblock. I used extractor::scrape just like above and the proxy started locally. However when I wanted to fetch a website from the proxy, I got an error: thread &amp;#39;tokio-runtime-worker&amp;#39; panicked at &amp;#39;Cannot drop a runtime in a context where blocking is not allowed. This happens when a runtime is dropped from within an asynchronous context.&amp;#39; This meant that I started a runtime inside a runtime. After checking the source code of the readability crate, I found that it builds a reqwest::blocking::Client and uses that to fetch the URL. After that request, the client is dropped which causes the runtime to be shut down. I fixed this by using a reqwest::Client instead of the reqwest::blocking::Client. &#x2F;&#x2F; reqwest::blocking::Client let client = reqwest::blocking::Client::new(); &#x2F;&#x2F; reqwest::Client let client = reqwest::Client::new(); Now I had the content of the article, but I still needed to pass it to readability. Fortunately they provide a function named extractor::extract that takes something that implements Read and returns the extracted content. However, the reqwest::Response doesn’t implement Read (in contrast to the reqwest::blocking::Response). So I needed to convert it to a Readable type myself. Luckily, the reqwest::Response has a bytes method that returns a Bytes object. The Bytes object implements Read and I can use it to call extractor::extract. let body = client.get(&amp;amp;url).await?.text().await?; let bytes = body.bytes().await?; let response = extractor::extract(&amp;amp;mut res, &amp;amp;url)?; Roadblock No. 2: Routing The app didn’t crash anymore, but I still didn’t get any response. My router looked like this: #[shuttle_service::main] async fn axum() -&amp;gt; shuttle_service::ShuttleAxum let router = Router::new().route(&amp;quot;&#x2F;:url&amp;quot;, get(readable)); let sync_wrapper = SyncWrapper::new(router); Ok(sync_wrapper) Turns out that when I use &#x2F;:url as the route, it doesn’t match the path &#x2F;https:&#x2F;&#x2F;example.com because : matches only a single segment up to the first slash. The solution was to use &#x2F;*url instead, which is a wildcard route that matches all segments until the end. Typography and Layout New York Times website (left) vs reader mode (right) For my first prototype I used a CSS framework called yue.css because it was the first thing I found which looked nice. For the final version I ended up mimicking the style of Ruud van Asseldonk’s blog because it always reminded me of reading a well-typeset book. For fonts I chose two of my favorites Crimson Pro for the body text. JetBrains Mono for the code. Both are licensed under the SIL Open Font License 1.1. You can even use readable from the terminal. lynx https:&#x2F;&#x2F;readable.shuttleapp.rs&#x2F;https:&#x2F;&#x2F;en.wikipedia.org&#x2F;wiki&#x2F;Alan_Turing Caveats The proxy is far from perfect. It’s something I built in a few hours for my personal use. It doesn’t always produce valid HTML. JavaScript is not executed, so some websites don’t work properly. Some might say that’s feature, not a bug. 😉 That is also true for websites with sophisticated paywalls or bot-detection. A workaround would be to use a headless browser like ScrapingBee or Browserless, but I didn’t want to add that complexity to the project. The readability library takes a lot of freedom in formatting the document however it pleases. It can sometimes produce weird results. For example, it loves to mangle code blocks. Credits I was not the first person to build a readability proxy. I found out about readable-proxy when I did my research, but the project seems to be abandoned. Nevertheless it was nice to see that others had the same need. Thanks to Ruud van Asseldonk for open sourcing his blog. 🙏 His writing and documentation are always a great source of inspiration to me. Conclusion The browser on my old Kobo eBook reader using the readability proxy. In times where the most popular browser might kill off ad blockers, a little service for reading articles without ads or tracking can come in handy. I’m not saying you should use it to send all your traffic through it, but it’s a nice tool to have in your toolbox for a rainy day, a warm drink and a great article. ☕ Feel free to deploy your own instance of readable or use the one I’m hosting. The source code is available on GitHub. Maybe one of you wants to help me maintain it. " + }, + { + "title": "zerocal - A Serverless Calendar App in Rust Running on shuttle.rs", + "url": "https://endler.dev/2022/zerocal/", + "body": " Every once in a while my buddies and I meet for dinner. I value these evenings, but the worst part is scheduling these events! We send out a message to the group. We wait for a response. We decide on a date. Someone sends out a calendar invite. Things finally happen. None of that is fun except for the dinner. Being the reasonable person you are, you would think: “Why don’t you just use a scheduling app?”. I have tried many of them. None of them are any good. They are all… too much! Just let me send out an invite and whoever wants can show up. I don’t want to have to create an account for your calendar&#x2F;scheduling&#x2F;whatever app. I don’t want to have to add my friends. I don’t want to have to add my friends’ friends. I don’t want to have to add my friends’ friends’ friends. You get the idea: I just want to send out an invite and get no response from you. The nerdy, introvert engineer’s solution 💡 What we definitely need is yet another calendar app which allows us to create events and send out an invite with a link to that event! You probably didn’t see that coming now, did you? Oh, and I don’t want to use Google Calendar to create the event because I don’t trust them. Like any reasonable person, I wanted a way to create calendar entries from my terminal. That’s how I pitched the idea to my buddies last time. The answer was: “I don’t know, sounds like a solution in search of a problem.” But you know what they say: Never ask a starfish for directions. Show, don’t tell That night I went home and built a website that would create a calendar entry from GET parameters. It allows you to create a calendar event from the convenience of your command line: &amp;gt; curl https:&#x2F;&#x2F;zerocal.shuttleapp.rs?start=2022-11-04+20:00&amp;amp;duration=3h&amp;amp;title=Birthday&amp;amp;description=paaarty BEGIN:VCALENDAR VERSION:2.0 PRODID:ICALENDAR-RS CALSCALE:GREGORIAN BEGIN:VEVENT DTSTAMP:20221002T123149Z CLASS:CONFIDENTIAL DESCRIPTION:paaarty DTEND:20221002T133149Z DTSTART:20221002T123149Z SUMMARY:Birthday UID:c99dd4bb-5c35-4d61-9c46-7a471de0e7f4 END:VEVENT END:VCALENDAR You can then save that to a file and open it with your calendar app. &amp;gt; curl https:&#x2F;&#x2F;zerocal.shuttleapp.rs?start=2022-11-04+20:00&amp;amp;duration=3h&amp;amp;title=Birthday&amp;amp;description=paaarty &amp;gt; birthday.ics &amp;gt; open birthday.ics In a sense, it’s a “serverless calendar app”, haha. There is no state on the server, it just generates a calendar event on the fly and returns it. How I built it You probably noticed that the URL contains “shuttleapp.rs”. That’s because I’m using shuttle.rs to host the website. Shuttle is a hosting service for Rust projects and I wanted to try it out for a long time. To initialize the project using the awesome axum web framework, I’ve used cargo install cargo-shuttle cargo shuttle init --axum --name zerocal zerocal and I was greeted with everything I needed to get started: use axum:: routing::get, Router ; use sync_wrapper::SyncWrapper; async fn hello_world() -&amp;gt; &amp;amp;&amp;#39;static str &amp;quot;Hello, world!&amp;quot; #[shuttle_service::main] async fn axum() -&amp;gt; shuttle_service::ShuttleAxum let router = Router::new().route(&amp;quot;&#x2F;hello&amp;quot;, get(hello_world)); let sync_wrapper = SyncWrapper::new(router); Ok(sync_wrapper) Let’s quickly commit the changes: git add .gitignore Cargo.toml src&#x2F; git commit -m &amp;quot;Hello World&amp;quot; To deploy the code, I needed to sign up for a shuttle account. This can be done over at https:&#x2F;&#x2F;www.shuttle.rs&#x2F;login. It will ask you to authorize it to access your Github account. Then: cargo shuttle login and finally: cargo shuttle deploy Now let’s head over to zerocal.shuttleapp.rs: Hello World! Deploying the first version took less than 5 minutes. Neat! We’re all set for our custom calendar app. Writing the app To create the calendar event, I used the icalendar crate (shout out to hoodie for creating this nice library!). iCalendar is a standard for creating calendar events that is supported by most calendar apps. cargo add icalendar cargo add chrono # For date and time parsing Let’s create a demo calendar event: let event = Event::new() .summary(&amp;quot;test event&amp;quot;) .description(&amp;quot;here I have something really important to do&amp;quot;) .starts(Utc::now()) .ends(Utc::now() + Duration::days(1)) .done(); Simple enough. How to return a file!? Now that we have a calendar event, we need to return it to the user. But how do we return it as a file? There’s an example of how to return a file dynamically in axum here. async fn calendar() -&amp;gt; impl IntoResponse let ical = Calendar::new() .push( &#x2F;&#x2F; add an event Event::new() .summary(&amp;quot;It works! 😀&amp;quot;) .description(&amp;quot;Meeting with the Rust community&amp;quot;) .starts(Utc::now() + Duration::hours(1)) .ends(Utc::now() + Duration::hours(2)) .done(), ) .done(); CalendarResponse(ical) Some interesting things to note here: Every calendar file is a collection of events so we wrap the event in a Calendar object, which represents the collection. impl IntoResponse is a trait that allows us to return any type that implements it. CalendarResponse is a newtype wrapper around Calendar that implements IntoResponse. Here is the CalendarResponse implementation: &#x2F;&#x2F;&#x2F; Newtype wrapper around Calendar for `IntoResponse` impl #[derive(Debug)] pub struct CalendarResponse(pub Calendar); impl IntoResponse for CalendarResponse fn into_response(self) -&amp;gt; Response let mut res = Response::new(boxed(Full::from(self.0.to_string()))); res.headers_mut().insert( header::CONTENT_TYPE, HeaderValue::from_static(&amp;quot;text&#x2F;calendar&amp;quot;), ); res We just create a new Response object and set the Content-Type header to the correct MIME type for iCalendar files: text&#x2F;calendar. Then we return the response. Add date parsing This part is a bit hacky, so feel free to glance over it. We need to parse the date and duration from the query string. I used dateparser, because it supports sooo many different date formats. async fn calendar(Query(params): Query&amp;lt;HashMap&amp;lt;String, String&amp;gt;&amp;gt;) -&amp;gt; impl IntoResponse let mut event = Event::new(); event.class(Class::Confidential); if let Some(title) = params.get(&amp;quot;title&amp;quot;) event.summary(title); else event.summary(DEFAULT_EVENT_TITLE); if let Some(description) = params.get(&amp;quot;description&amp;quot;) event.description(description); else event.description(&amp;quot;Powered by zerocal.shuttleapp.rs&amp;quot;); if let Some(start) = params.get(&amp;quot;start&amp;quot;) let start = dateparser::parse(start).unwrap(); event.starts(start); if let Some(duration) = params.get(&amp;quot;duration&amp;quot;) let duration = humantime::parse_duration(duration).unwrap(); let duration = chrono::Duration::from_std(duration).unwrap(); event.ends(start + duration); if let Some(end) = params.get(&amp;quot;end&amp;quot;) let end = dateparser::parse(end).unwrap(); event.ends(end); if let Some(duration) = params.get(&amp;quot;duration&amp;quot;) if params.get(&amp;quot;start&amp;quot;).is_none() let duration = humantime::parse_duration(duration).unwrap(); let duration = chrono::Duration::from_std(duration).unwrap(); event.starts(end - duration); let ical = Calendar::new().push(event.done()).done(); CalendarResponse(ical) Would be nice to support more date formats like now and tomorrow, but I’ll leave that for another time. Let’s test it: &amp;gt; cargo shuttle run # This starts a local dev server &amp;gt; curl 127.0.0.1:8000?start=2022-11-04+20:00&amp;amp;duration=3h&amp;amp;title=Birthday&amp;amp;description=Party *🤖 bleep bloop, calendar file created* Nice, it works! Opening it in the browser creates a new event in the calendar: Of course, it also works on Chrome, but you do support the open web, right? And for all the odd people who don’t use a terminal to create a calendar event, let’s also add a form to the website. Add a form &amp;lt;form&amp;gt; &amp;lt;table&amp;gt; &amp;lt;tr&amp;gt; &amp;lt;td&amp;gt; &amp;lt;label for=&amp;quot;title&amp;quot;&amp;gt;Event Title&amp;lt;&#x2F;label&amp;gt; &amp;lt;&#x2F;td&amp;gt; &amp;lt;td&amp;gt; &amp;lt;input type=&amp;quot;text&amp;quot; id=&amp;quot;title&amp;quot; name=&amp;quot;title&amp;quot; value=&amp;quot;Birthday&amp;quot; &#x2F;&amp;gt; &amp;lt;&#x2F;td&amp;gt; &amp;lt;&#x2F;tr&amp;gt; &amp;lt;tr&amp;gt; &amp;lt;td&amp;gt; &amp;lt;label for=&amp;quot;desc&amp;quot;&amp;gt;Description&amp;lt;&#x2F;label&amp;gt; &amp;lt;&#x2F;td&amp;gt; &amp;lt;td&amp;gt; &amp;lt;input type=&amp;quot;text&amp;quot; id=&amp;quot;desc&amp;quot; name=&amp;quot;desc&amp;quot; value=&amp;quot;Party&amp;quot; &#x2F;&amp;gt; &amp;lt;&#x2F;td&amp;gt; &amp;lt;&#x2F;tr&amp;gt; &amp;lt;tr&amp;gt; &amp;lt;td&amp;gt;&amp;lt;label for=&amp;quot;start&amp;quot;&amp;gt;Start&amp;lt;&#x2F;label&amp;gt;&amp;lt;&#x2F;td&amp;gt; &amp;lt;td&amp;gt; &amp;lt;input type=&amp;quot;datetime-local&amp;quot; id=&amp;quot;start&amp;quot; name=&amp;quot;start&amp;quot; &#x2F;&amp;gt; &amp;lt;&#x2F;td&amp;gt; &amp;lt;&#x2F;tr&amp;gt; &amp;lt;tr&amp;gt; &amp;lt;td&amp;gt;&amp;lt;label for=&amp;quot;end&amp;quot;&amp;gt;End&amp;lt;&#x2F;label&amp;gt;&amp;lt;&#x2F;td&amp;gt; &amp;lt;td&amp;gt; &amp;lt;input type=&amp;quot;datetime-local&amp;quot; id=&amp;quot;end&amp;quot; name=&amp;quot;end&amp;quot; &#x2F;&amp;gt; &amp;lt;&#x2F;td&amp;gt; &amp;lt;&#x2F;tr&amp;gt; &amp;lt;&#x2F;table&amp;gt; &amp;lt;&#x2F;form&amp;gt; I modified the calendar function a bit to return the form if the query string is empty: async fn calendar(Query(params): Query&amp;lt;HashMap&amp;lt;String, String&amp;gt;&amp;gt;) -&amp;gt; impl IntoResponse &#x2F;&#x2F; if query is empty, show form if params.is_empty() return Response::builder() .status(200) .body(boxed(Full::from(include_str!(&amp;quot;..&#x2F;static&#x2F;index.html&amp;quot;)))) .unwrap(); &#x2F;&#x2F; ... After some more tweaking, we got ourselves a nice little form in all of its web 1.0 glory: The form And that’s it! We now have a little web app that can create calendar events. Well, almost. We still need to deploy it. Deploying cargo shuttle deploy Right, that’s all. It’s that easy. Thanks to the folks over at shuttle.rs for making this possible. The calendar app is now available at zerocal.shuttleapp.rs. Now I can finally send my friends a link to a calendar event for our next pub crawl. They’ll surely appreciate it.yeahyeah From zero to calendar in 100 lines of Rust Boy it feels good to be writing some plain HTML again. Building little apps never gets old. Check out the source code on GitHub and help me make it better! 🙏 Here are some ideas: ✅ Add location support (e.g. location=Berlin or location=https:&#x2F;&#x2F;zoom.us&#x2F;test). Thanks to sigaloid. Add support for more human-readable date formats (e.g. now, tomorrow). Add support for recurring events. Add support for timezones. Add Google calendar short-links (https:&#x2F;&#x2F;calendar.google.com&#x2F;calendar&#x2F;render?action=TEMPLATE&amp;amp;dates=20221003T224500Z%2F20221003T224500Z&amp;amp;details=&amp;amp;location=&amp;amp;text=). Add example bash command to create a calendar event from the command line. Shorten the URL (e.g. zerocal.shuttleapp.rs&#x2F;2022-11-04T20:00&#x2F;3h&#x2F;Birthday&#x2F;Party)? Check out the issue tracker and feel free to open a PR! " + }, + { + "title": "The Uber of Poland", + "url": "https://endler.dev/2021/uber-of-poland/", + "body": "A few years ago I visited a friend in Gdańsk, Poland. As we explored the city, one thing I noticed was that cabs were relatively expensive and there was no Uber. Instead, most (young) people used a community-organized service called Night Riders. I couldn’t find anything about that service on the web, so I decided to write about it to preserve its history. Delightfully Low-Tech What fascinated me about Night Riders was the way the service operated — completely via WhatsApp: you post a message in a group chat and one of the free riders would reply with a 👍 emoji. With that, your ride was scheduled. You’d pay through PayPal or cash. In these days of venture-backed startups that need millions in capital before they turn a profit, this approach is decidedly antagonistic. Basically, Night Riders built on top of existing infrastructure instead of maintaining their own ride-hailing platform, sign-up process, or even website. The service would grow solely by word of mouth. Using existing infrastructure meant that it was extremely cheap to run and there were almost zero upfront costs without a single line of code to write. It simply solved the customer’s problem in the most straightforward way possible. Of course, there are legal issues regarding data protection, labor law or payment processing, but the important bit is that they had paying customers from day one. The rest is easier to solve than a lack of product market fit. In Defense of Clones Uber and Lyft can’t be everywhere from the start. While they expand their businesses, others have the ability to outpace them. There’s an Uber clone in China (DiDi), one in Africa and the Middle East (Careem) and basically one for every country in the world. The tech industry rarely talks about these Ubers of X, but they serve millions of customers. While they start as exact copies of their well-known counterparts, some of them end up offering better service thanks to their understanding of the local market. People always find a way With creativity, you can provide great service even without a big budget. The important part is to know which corners you can cut while staying true to your mission. If there’s a market, there’s a way. The Cubans have a word for it: resolver, which means “we’ll figure it out”. " + }, + { + "title": "How Does The Unix `history` Command Work?", + "url": "https://endler.dev/2021/history/", + "body": " Source: Cozy attic created by vectorpouch and tux created by catalyststuff — freepik.com As the day is winding down, I have a good hour just to myself. Perfect time to listen to some Billie Joel (it’s either Billie Joel or Billie Eilish for me these days) and learn how the Unix history command works. Life is good. Learning what makes Unix tick is a bit of a hobby of mine. I covered yes, ls, and cat before. Don’t judge. How does history even work? Every command is tracked, so I see the last few commands on my machine when I run history. ❯❯❯ history 8680 cd endler.dev 8682 cd content&#x2F;2021 8683 mkdir history 8684 cd history 8685 vim index.md Yeah, but how does it do that? The manpage on my mac is not really helpful — I also couldn’t find much in the first place. I found this article (it’s good etiquette nowadays to warn you that this is a Medium link) and it describes a bit of what’s going on. Every command is stored in $HISTFILE, which points to ~&#x2F;.zsh_history for me. ❯❯❯ tail $HISTFILE : 1586007759:0;cd endler.dev : 1586007763:0;cd content&#x2F;2021 : 1586007771:0;mkdir history : 1586007772:0;cd history : 1586007777:0;vim index.md ... So let’s see. We got a : followed by a timestamp followed by :0, then a separator (;) and finally the command itself. Each new command gets appended to the end of the file. Not too hard to recreate. Hold on, what’s that 0 about!? It turns out it’s the command duration, and the entire thing is called the extended history format: : &amp;lt;beginning time&amp;gt;:&amp;lt;elapsed seconds&amp;gt;;&amp;lt;command&amp;gt; (Depending on your settings, your file might look different.) Hooking into history But still, how does history really work. It must run some code whenever I execute a command — a hook of some sort! 💥 Swoooooosh 💥 Matthias from the future steps out of a blinding ball of light: Waaait! That’s not really how it works! It turns out that shells like bash and zsh don’t actually call a hook for history. Why should they? When history is a shell builtin, they can just track the commands internally. Thankfully my editor-in-chief and resident Unix neckbeard Simon Brüggen explained that to me — but only after I sent him the first draft for this article. 😓 As such, the next section is a bit like Lord of the Rings: a sympathetic but naive fellow on a questionable mission with no clue of what he’s getting himself into. In my defense, Lord of the Rings is also enjoyed primarily for its entertainment value, not its historical accuracy…. and just like in this epic story, I promise we’ll get to the bottom of things in the end. I found add-zsh-hook and a usage example in atuin’s source code. I might not fully comprehend all of that is written there, but I’m a man of action, and I can take a solid piece of work and tear it apart. It’s not much, but here’s what I got: # Source this in your ~&#x2F;.zshrc autoload -U add-zsh-hook _past_preexec() echo &amp;quot;preexec&amp;quot; _past_precmd() echo &amp;quot;precmd&amp;quot; add-zsh-hook preexec _past_preexec add-zsh-hook precmd _past_precmd This sets up two hooks: the first one gets called right before a command gets executed and the second one directly after. (I decided to call my little history replacement past. I like short names.) Okay, let’s tell zsh to totally run this file whenever we execute a command: source src&#x2F;shell&#x2F;past.zsh …aaaaaand ❯❯❯ date preexec Fri May 28 18:53:55 CEST 2021 precmd It works! ✨ How exciting! ✨ Actually, I just remember now that I did the same thing for my little environment settings manager envy over two years ago, but hey! So what to do with our newly acquired power? Let’s Run Some Rust Code Here’s the thing: only preexec gets the “real” command. precmd gets nothing: _past_preexec() echo &amp;quot;preexec $@&amp;quot; _past_precmd() echo &amp;quot;precmd $@&amp;quot; $@ means “show me what you got” and here’s what it got: ❯❯❯ date preexec date date date Fri May 28 19:02:11 CEST 2021 precmd Shouldn’t one “date” be enough? Hum… let’s look at the zsh documentation for preexec: If the history mechanism is active […], the string that the user typed is passed as the first argument, otherwise it is an empty string. The actual command that will be executed (including expanded aliases) is passed in two different forms: the second argument is a single-line, size-limited version of the command (with things like function bodies elided); the third argument contains the full text that is being executed. I don’t know about you, but the third argument should be all we ever need? 🤨 Checking… ❯❯❯ ls -l preexec ls -l lsd -l lsd -l (Shout out to lsd, the next-gen ls command ) Alright, good enough. Let’s parse $3 with some Rust code and write it to our own history file. use std::env; use std::error::Error; use std::fs::OpenOptions; use std::io::Write; const HISTORY_FILE: &amp;amp;str = &amp;quot;lol&amp;quot;; fn main() -&amp;gt; Result&amp;lt;(), Box&amp;lt;dyn Error&amp;gt;&amp;gt; let mut history = OpenOptions::new() .create(true) .append(true) .open(HISTORY_FILE)?; if let Some(command) = env::args().nth(3) writeln!(history, &amp;quot; &amp;quot;, command)?; ; Ok(()) ❯❯❯ cargo run -- dummy dummy hello ❯❯❯ cargo run -- dummy dummy world ❯❯❯ cat lol hello world We’re almost done — at least if we’re willing to cheat a bit. 😏 Let’s hardcode that format string: use std::env; use std::error::Error; use std::fs::OpenOptions; use std::io::Write; use std::time::SystemTime; const HISTORY_FILE: &amp;amp;str = &amp;quot;lol&amp;quot;; fn timestamp() -&amp;gt; Result&amp;lt;u64, Box&amp;lt;dyn Error&amp;gt;&amp;gt; let n = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH)?; Ok(n.as_secs()) fn main() -&amp;gt; Result&amp;lt;(), Box&amp;lt;dyn Error&amp;gt;&amp;gt; let mut history = OpenOptions::new() .create(true) .append(true) .open(HISTORY_FILE)?; if let Some(command) = env::args().nth(3) writeln!(history, &amp;quot;: :0; &amp;quot;, timestamp()?, command)?; ; Ok(()) Now, if we squint a little, it sorta kinda writes our command in my history format. (That part about the Unix timestamp was taken straight from the docs. Zero regrets.) Remember when I said that precmd gets nothing? I lied. In reality, you can read the exit code of the executed command (from $?). That’s very helpful, but we just agree to ignore that and never talk about it again. With this out of the way, our final past.zsh hooks file looks like that: autoload -U add-zsh-hook _past_preexec() past $@ add-zsh-hook preexec _past_preexec Now here comes the dangerous part! Step back while I replace the original history command with my own. Never try this at home. (Actually I’m exaggerating a bit. Feel free to try it. Worst thing that will happen is that you’ll lose a bit of history, but don’t sue me.) First, let’s change the path to the history file to my real one: &#x2F;&#x2F; You should read the $ HISTFILE env var instead ;) const HISTORY_FILE: &amp;amp;str = &amp;quot;&#x2F;Users&#x2F;mendler&#x2F;.zhistory&amp;quot;; Then let’s install past: ❯❯❯ cargo install --path . # bleep bloop... After that, it’s ready to use. Let’s add that bad boy to my ~&#x2F;.zshrc: source &amp;quot;&#x2F;Users&#x2F;mendler&#x2F;Code&#x2F;private&#x2F;past&#x2F;src&#x2F;shell&#x2F;past.zsh&amp;quot; And FINALLY we can test it. We open a new shell and run a few commands followed by history: ❯❯❯ date ... ❯❯❯ ls ... ❯❯❯ it works ... ❯❯❯ history 1011 date 1012 ls 1013 it works ✨ Yay. ✨ The source code for past is on Github. How it really really works Our experiment was a great success, but I since learned that reality is a bit different. “In early versions of Unix the history command was a separate program”, but most modern shells have history builtin. zsh tracks the history in its main run loop. Here are the important bits. (Assume all types are in scope.) Eprog prog; &#x2F;* Main zsh run loop *&#x2F; for (;;) &#x2F;* Init history *&#x2F; hbegin(1); if (!(prog = parse_event(ENDINPUT))) &#x2F;* Couldn&amp;#39;t parse command. Stop history *&#x2F; hend(NULL); continue; &#x2F;* Store command in history *&#x2F; if (hend(prog)) LinkList args; args = newlinklist(); addlinknode(args, hist_ring-&amp;gt;node.nam); addlinknode(args, dupstring(getjobtext(prog, NULL))); addlinknode(args, cmdstr = getpermtext(prog, NULL, 0)); &#x2F;* Here&amp;#39;s the preexec hook that we used. * It gets passed all the args we saw earlier. *&#x2F; callhookfunc(&amp;quot;preexec&amp;quot;, args, 1, NULL); &#x2F;* Main routine for executing a command *&#x2F; execode(prog); The history lines are kept in a hash, and also in a ring-buffer to prevent the history from getting too big. (See here.) That’s smart! Without the ring-buffer, a malicious user could just thrash the history with random commands until a buffer overflow is triggered. I never thought of that. History time (see what I did there?) The original history command was added to the Unix C shell (csh) in 1978. Here’s a link to the paper by Bill Joy (hey, another Bill!). He took inspiration from the REDO command in Interlisp. You can find its specification in the original Interlisp manual in section 8.7. Lessons learned Rebuild what you don’t understand. The history file is human-readable and pretty straightforward. The history command is a shell builtin, but we can use hooks to write our own. Fun fact: Did you know that in zsh, history is actually just an alias for fc -l? More info here or check out the source code. “What I cannot create, I do not understand” — Richard Feynman " + }, + { + "title": "Spreadsheets Make For Great Business Ideas", + "url": "https://endler.dev/2021/first-business/", + "body": "One of the best decisions I made in 2020 was to open my calendar to everyone. People book appointments to chat about open-source projects, content creation, and business ideas. When we discuss business ideas, the conversation often leans towards problems suited for startups, such as using artificial intelligence to find clothes that fit or building a crowdfunding platform on the blockchain. While these are exciting ideas, they require significant persistence and deep pockets. It might be easier and less risky to join an existing startup in that domain. In reality, most people are simply looking for something cool to work on and to make their customers happy. It turns out you don’t need to run a startup to achieve that (and you probably shouldn’t). Instead, starting a side project is less risky and can organically grow into a business over time. Often, the solution is right in front of them: hidden within an Excel spreadsheet on their computer. I Hate Excel I spend as little time in Excel as possible, only engaging with it when absolutely necessary. My focus is on getting tasks done quickly, not on layout or design; I’d rather pay someone to do that work for me. And this is precisely my point! The spreadsheets and lists you create to solve your own problems can also solve someone else’s. This represents a business opportunity! This approach has several advantages: 💪 It solves a real problem. 🥱 It’s mundane, so people might pay to avoid doing it themselves. ⚡️ It wastes no time on design or infrastructure, embodying the ultimate MVP. 🐢 It’s low-tech: no programming required. You can start with Notion and Super.so. 🐜 It targets a niche market: if there were an established service, you’d already be using it. Big corporations won’t compete with you. 🚀 It allows you to spend less time building and more time engaging with potential customers. Examples A few years ago, while researching static code analysis tools, I compiled a list, pushed it to GitHub, and moved on. Fast forward, and that side project now generates revenue from sponsors and consulting gigs. Another example is a person who created a spreadsheet for remote work locations, shared it on Twitter, and then developed a website from it. The website is NomadList, and its creator, Pieter Levels, now earns $300k&#x2F;year. “Instead of building a site first, I simply made [a] public Google spreadsheet to collect the first data and see if there’d be interest for this at all.” — Pieter Levels on how he created NomadList. I’ve left a spot for your story here. Now, refine that spreadsheet (or list), share it with your friends, iterate based on their feedback, and build your first business. " + }, + { + "title": "Starting A Print-On-Demand Business As A Software Engineer", + "url": "https://endler.dev/2021/codeprints/", + "body": "One day I had the idea to make a print of my Github timeline. I liked the thought of bringing something “virtual” into the real world. 😄 So I called up my friend Wolfgang and we built codeprints. It’s my first “physical” product, so I decided to share my learnings. Felix Krause of fastlane fame was one of our first customers and we are very thankful for this tweet promoting our service, which gave us a huge traffic boost. Launching Is Hard, So Launch Early Even though I knew that launching early was vital, I still didn’t want to “commit” to the final design shortly before the planned go-live. There was always that last bug to fix or that little extra feature to implement. For example, I wanted to offer two designs&#x2F;layouts: the classic Github contribution timeline and a graph-based design for repositories. In cases like that, it helps to have a co-founder. Wolfgang convinced me that multiple layouts were not needed for the MVP and that whatever we’d come up with would probably be wrong anyway without getting early user feedback. He was right. Without Wolfgang, the shop would probably still not be live today. We have a much clearer vision now of what people want to see, thanks to launching early. Turns out users were not really interested in the graph-based design after all, and it would have been a waste of time to create it. Lesson learned: Even if you know all the rules for building products, it’s different when applying them in practice for the first time. We’ll probably never be completely happy with the shop functionality, but it’s better to launch early and make incremental improvements later. Software Development Is Easy When we started, my main concern was software development. The frontend and the backend needed to be coded and work together. We didn’t want to run into Github rate-limiting issues in case there were many users on the site. I was also thinking a lot about which web frontend to use. Should we build it in Rust using Yew or better go with Gatsby? Turns out writing the code is the easy part. Being software engineers, it didn’t take us too long to implement the backend API and we quickly found a decent template for the frontend. Most of our time was spent thinking about the product, the user experience, financing, taxes, the shipping process, marketing, and integrating customer feedback. These were all things I had (and still have) little experience in. Wolfgang suggested to “just use Shopify and the default template” to get started quickly. In hindsight, it was the absolute right decision. I always thought Shopify was for simple mom-and-pop stores, but it turns out it’s highly customizable, integrates well with pretty much anything, and offers excellent tooling like themekit. Payments, refunds, discounts, customer analytics: it’s all built into the platform. It saved us sooo much development time. Lesson learned: There are many unknown unknowns — things we are neither aware of nor understand — when starting a project. Try to get to the root of the problem as soon as possible to save time and avoid the sunk cost fallacy. Users Expect Great UI&#x2F;UX Giants like Amazon, Facebook, and Netflix have raised customer expectations for great UX. They spend millions polishing their websites and getting every detail right. As a result, their sites work just right for millions of customers and on every device. An indie shop does not have these resources. Nevertheless, many customers expect the same quality user experience as on other sites they use. Being on the other side of the fence for the first time, I learned how hard it is to build a user interface that works for 90% of the people. Every little detail — like the order of form fields — makes a huge difference. Get too many details wrong, and you lose a customer. Those things can only be found by watching real users use your product. I promise you, it will be eye-opening! Lesson learned: Watch potential customers use your service. It will be painful at first, but will improve the quality of your product. Use standard frameworks for shops if you can because they get many UI&#x2F;UX details right out of the box. WooCommerce or Shopify come to mind. Building Products Means Being Pragmatic We have many ideas for future products. Many friends and customers tell us about potential features all the time, but the problem is how to prioritize them. Most ideas won’t work at scale: It’s tricky to find a supplier that has a product on offer, is cheap, ships worldwide, and has a working integration with your shop-system. So we have to regularly scrap product ideas, simply because our suppliers’ support is not there. On top of that, we run the business next to our day job and other responsibilities, so we need to make use of our time as efficiently as possible. Lesson learned: Making services look effortless is hard work. Time is your biggest constraint. You’ll have to say “no” more often than you can say “yes”. Due to the pandemic, codeprints was entirely built remotely. More people should give whereby a try. Getting Traction As A Small Business It has never been easier to launch a shop. Services like Shopify, Stripe, and a host of suppliers make starting out a breeze. On the other hand, there is a lot more competition now that the barrier to entry is so low. Thousands of services are constantly competing for our attention. On top of that, most customers just default to big platforms like Amazon, AliExpress, or eBay for their shopping needs these days, and search engines send a big chunk of the traffic there. Since our product is custom-made, we can not offer it on those bigger platforms. As an indie shop, we get most visitors through word of mouth, exceptional customer support, and advertising where developers hang out: Twitter, Reddit, HackerNews, Lobste.rs, and friends. It’s essential to focus on providing value on those platforms; a plain marketing post won’t get you any attention. Other platforms like LinkedIn, Facebook, ProductHunt, or IndieHackers could also work, but our target audience (OSS developers with an active Github profile) doesn’t hang out there that much. Lesson learned: Always know where your customers are and understand their needs. Finding A Niche Is Only Half The Job Common market wisdom is to find niche and grow from within. With codeprints we definitely found our niche: the audience is very narrow but interested in our geeky products. There are 56 million developers on Github today; that’s a big target audience. Most profiles are not very active, though. To make a print look attractive, you’d have to consistently commit code over a long period of time — many years. If we assume that only 1% of devs are active, that limits our target audience to 560.000 users. That’s still a big but much smaller market. Now, if only 1% of these people find the shop and order something (which would be quite a good ratio), we’re looking at 5.600 orders total. Not that much! In order to extend that audience, one could either increase the number of potential customers or focus on getting more of the existing potential customers on the page. In our case, we expanded by offering a one-year layout, reducing the required level of Github activity for a cool print. We are also working on making emptier profiles look more interesting and highlighting the value-producing part of open source contribution. Every contribution counts — no matter how tiny. Lesson learned: Make sure that your niche market is not too narrow so that you can make a sustainable business out of it. Early adopters like Orta Therox are incredibly precious when starting out. Not everybody has a rockstar profile like that, though (and that’s fine). Make User Feedback Actionable Initial customer feedback is precious. You should focus on every word these customers say as they believe in your product and want you to win. (They voted with their wallet after all.) Feedback from friends is helpful, too, but I usually apply a bigger filter to that. Not all of my friends are software developers, and while they all mean well, what they tell me might be different from what they mean. It’s like they are asking for faster horses when what they really want is a car. Feedback on social media can be… snarky at times; be prepared for that! Your job is to find the grain of truth in every statement and focus on constructive advice. For example, take this feedback we got: How lazy can someone be to pay €36 for this. You could turn it around to make it constructive: Can I get a cheaper version to print myself? And that is some valuable feedback. We could provide a downloadable version in the future! Lesson learned: It takes practice to extract actionable feedback from user input and make it fit your product vision. Summary 2020 was a crazy year. I helped launch two small side-businesses, codeprints and analysis-tools.dev. Both have an entirely different revenue model, but they have one thing in common: they were super fun to build! 🤩 It’s motivating to look back at those achievements sometimes… That print of 2020 pretty much encapsulates those feelings for me. (Note the greener spots in August and September, which is when we launched analysis-tools and the days in December when we built codeprints.) My coding year in review using our new vertical layout.Here’s to building more products in 2021. Let me know if you found that post helpful and reach out if you have questions. Oh and if you’re looking for a unique way to decorate your home office, why not get your own print from codeprints? 😊 P.S.: If you’re a product owner and you’re looking for a unique present for your team, get in contact and be the first to get an invite to a private beta. " + }, + { + "title": "So You Want To Earn Money With Open Source", + "url": "https://endler.dev/2021/oss-money/", + "body": "I earned 0 Euros from maintaining OSS software for years, and I thought that’s the way things are. I finally looked into ways to monetize my projects last year and in this talk I want to share what I learned so far. It didn’t make me rich (yet!), but I built my first sustainable side-project with analysis-tools.dev ✨. I’ll talk about this and other projects and the mistakes I made on the road towards sustainability. document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); Related links and resources: Podcast by Caleb Porzio about why building a business around Github sponsors is so hard. The Changelog Podcast — It’s OK to make money from your open source with Zeno Rocha. Nadia Eghbal talking about Maintenance of our essential info-structure. She also wrote a book on that topic called Working in Public. Generating income from open source by Vadim Demedes, which explains different ways to monetize your projects based on various real-world examples. Find a full transcript of the talk below. (Sorry for the wall of text.) This is my talk about earning money with Open Source, which I gave at the Web Engineering Meetup Aachen at the end of 2020. The organizers gladly allowed me to share it on my YouTube channel. I’m basically trying to answer the question: “Why am I not making 100k on Github?”. I’m talking about finding corporate sponsors for myself and the long road towards sustainability of open-source maintenance. You might not even want to start. This is a talk for those people that have the mindset that it’s probably not worth it to spend that much effort on Open Source if it takes so long until you find success. Now, this talk turned out to be a little grim. I had this very motivational talk in mind, but in reality, it’s hard, and by hard, I mean it’s really hard. I just want to get this point across and maybe still motivate you to do it but first: why am I entitled to talk about this? I’ve been doing Open Source for 10 years over 10 years now. This is a talk dedicated to my former self maybe 15 years ago. I work at trivago, which is a hotel search company based in Düsseldorf. I have a blog at endler.dev. Like everyone and their mom, I also have a YouTube channel. It’s called Hello, Rust! and I’m extremely active with one video every two years. Hence, you definitely want to subscribe to not miss any updates. But today, I want to talk about Open Source, and I have a very sophisticated outline with two points my journey and revenue models. Let’s go back all the way to 2010. The world definitely looked a bit different back then. Github in 2010 This was Github, and I was a bit late to the game. I joined in January 2010, and by then, Github was already two years old, so my username was taken. I usually go by the handle mre on platforms, and I noticed that this handle was not used by anyone, so I just sent a mail to support and asked if I could have it, and then I got an answer from this guy saying “go for it.” It was Chris Wanstrath, who goes by the handle defunct, and he’s the former CEO of Github, and at this point in time, I was hooked. I really liked the platform. I really liked how they worked very hands-on with Open Source. I used it for some projects of mine; you can see in the screenshot that I uploaded my blog, for example, because they host it for free. It was built with Jekyll, and you just push it to their site. Then they statically generate it, and it’s done. It goes without saying that nothing has changed in the last 10 years because my blog more or less still looks like that. It’s not built with jQuery and Jekyll anymore, but with zola and Cloudflare Worker Sites, but it’s more or less the same thing. For preparing for this talk, I wanted to take a step back and see where I was coming from and where I am right now, and probably the best way to do it is to look up some statistics and see if the number of repositories over time would give me some insights. So I queried the Github API for that. You can see it’s pretty much a linear graph from 2010 all the way to 2020. Except for 2018, where I reached peak productivity, it seems, but oh well. In the end, it’s more or less a linear thing, and you might say you put some work in you get some feedback out, but in reality, it’s different. There is a compound effect. If we look at my number of stars over time, you can see that more or less it started very slowly, and now it’s sort of growing exponentially, so right now, we are at 25.000 stars across all projects. Another way to look at it would be the number of followers. That’s kind of a new metric to me, but I did look up some statistics from archive.org (because Github doesn’t have that information through their API), and again, it’s more or less exponential growth. You put some work in, but you get a compound effect of your work plus some interest out. This is not luck; it’s work. It means you know what you’re doing. At the same time, there’s the elephant in the room, and that is it’s just a pat on the back. We have earned zero dollars until now, and one question you might have is how do you monetize this effort. First off, is it an effort? Well, I don’t know about you, but I probably spend two or three hours on average per day on Open Source: thinking about Open Source and creating new projects, but also maintaining and code review, so it really is work, and it’s a lot of work, and you more or less do that for free. There’s nothing wrong with doing things for free and doing it as a hobby, but in this case, you are supposed to be working on whatever you like. Open Source is not like that; sometimes you have obligations, and you feel responsible for maybe helping people out, which is a big part of it. You do that next to your regular work, so it can really be a burden. If you don’t know by now, making this somehow valuable is hard, it’s really hard. I want to talk about some ways to build a proper revenue model from Open Source. It goes without saying that this should probably not be your first focus if you saw the graphs before, but once you reach a point where you want to get some revenue, you have a couple of options. I don’t want to talk about doing Open Source as part of your business, and I don’t want to talk about bigger companies and more significant support here. I want to focus on a couple things that everyone can do. Sponsoring [on Github] is one. Offer paid learning materials on top of your normal documentation. For example, you might have a video series that you ask for money. Sell merchandising like Mozilla does. Consulting next to your Open Source business Services and plugins like writing an ADFS plugin or high availability functionality are very common examples for paid features targeting enterprises. But let’s start with the basics. Let’s start with point number one, sponsoring. There are two types of sponsoring: the first one is individual donations. Individual sponsoring is what Github Sponsors is all about. If you want to earn money [with that model], you have to think about the funnel, and you have to think about how you capture people’s attention and how you monetize that. It starts with a product, [which] can be anything. From there, you generate interest, and this interest creates an audience, and that audience eventually might pay for your service, and this is actually the entire secret. It’s how you earn money with any product, and with Open Source, if you want to attract sponsors, you build a product people want. If you transfer that to Open Source, building a project is maybe a repository, and the stars indicate the interest of the audience. The audience itself is made out of followers (personal followers or followers of a company), and those followers might or might not become sponsors in the end. Now, I know stars are a terrible metric for popularity because some people use stars differently than others. For example, some use it as bookmarks to check out projects later, others want to thank the developers for maybe putting in a lot of effort, and so on, but it’s a good first estimation. Now, think about the following. Think about the number of stars I have and the followers and the number of sponsors. Think about my “funnel” right now. I told you that I have 25.000 stars and roughly 1000 followers, and out of those, I have three sponsors, so the ratio between the stars and sponsors is 0.01. That looks pretty grim. It means you need around 8.000 stars to attract a single supporter. I was wondering: “maybe it’s just me?”. Maybe the top 1000 Github maintainers did not have that problem. Well, it turns out it’s exactly the same schema. If you take the top 1000 Github maintainers and look at their sponsors, it’s again a pretty grim picture. For example, looking at the median, you look at 3421 followers per person and a median of zero sponsors. That’s zero percent if my math is correct, and if you look at the average, you even have 5430 followers (because Linus Torvalds pushes that number up). You have 2.8 sponsors out of that on average, and that is 0.5%, which is a bit more than I have, but it’s roughly in the same ballpark. Now think about this: Github has 40 million users, so that means the top 1000 maintainers make up 0.0025% of the entire community. The median income of those maintainers on Github is basically zero. That in and on itself is maybe not the biggest problem, but keep in mind that the Github revenue of 2019 was 300 million dollars. I read that comment on Hacker News yesterday: I have sponsors on Github and rake in a cool two dollars per month. It’s obviously less after taxes, so I have to have a day job. So this is clearly not working. You have to think of different ways to monetize Open Source, or you just wait until Github Sponsors becomes more popular – whatever happens first. One way I just want to quickly touch on is the notion of sponsorware. It’s kind of a new concept, and some people haven’t heard of it before. I honestly really like it. Generally speaking, you create a project, and you keep it private. You talk about it on Twitter, though or any other platform, and you say: “hey, I’m building this, but if you want early access, you have to become a sponsor,” and once you reach a certain threshold of sponsored sponsors, or income or whatever. Then you make a project public. This initial example that I showed you, where someone was earning 100k on Open Source, is from someone doing just that. He’s building products and services, talks about them, and then makes them open for everyone in the end. This has some advantages: first of you get early feedback from people that really believe in your mission. Second, you don’t have to work for free all the time, and third, you might also create an audience and hype from your projects. The disadvantage is that if you are a hardcore Open Source or free software believer, this goes against your ethic. You want the software to be open, to begin with, without any additional requirements. So you really have to make up your own mind about that. I tried, and I have an early access program, which I only share with sponsors. [My first sponsorware was a] tool for getting Github statistics. [The statistics from this talk were] created with that tool. I think you need a big audience to pull that off. The question is if you want to put that much effort in, or you just want to make it open in the first place and think about other revenue models. However, I think still it’s a very interesting concept, and we might see that [more] in the future, so you know how it looks like now, and you have a name for it. Another one is corporate sponsoring. This is a double-edged sword because corporate sponsoring means that a company gives you money and sometimes wants something. They might want additional support, or they want the bug to be fixed, and more or less it feels like you are somehow beginning to work for them, but nevertheless, those companies put in quite a big amount of money into Open Source these days. Looking at two big companies, Facebook and Google, they invested 177k and 845k respectively into Open Source over their lifetime on Open Collective, a platform for collecting those donations. That’s really great. We need more companies doing that, but also, as a little side note and maybe as a little rant, I believe that those companies are doing way too little. Facebook’s revenue last year was 70 billion, and Google had 160 billion, which is nothing to be ashamed of, so I wonder really if this is the most they can do. Of course, Google, for example, also donated to other projects like Mozilla, and they also organize meetups and so on. But do you really think that Facebook and Google would exist today if there was no Python or web server or Linux back in the day when two Stanford students tried to build a search engine? Sometimes I feel that Fortune 500 companies really don’t understand how much they depend on Open Source and how many people depend on a few people who maintain critical parts of our infrastructure. I don’t think they invest nearly enough into Open Source. What a lot of people think is that Open Source works like the panel on the left where you have a full room of engineers trying to figure out the best way to build a project, and in reality, it’s more or less someone working late at night to fix bugs and doing it because they believe in it. The public perception is probably wrong, and a really small group of people who maintain critical infrastructure. Sometimes that can lead to very tricky situations. Two of my childhood heroes talked about it openly: Kenneth Reitz is the core maintainer of requests for Python and antirez is the creator of Redis, a key-value store. So one is front-end development and the other one from backend-end. They both talk about burnout here because the burden of becoming an Open Source maintainer on a big scale can very much and very quickly lead to burnout. The internet never sleeps. You never go to sleep. You always get a ticket, a feature request, a pull request, an issue. You always have something to work on, and on top of that, you have to do all your other responsibilities, so that can lead to burnout really quickly. There was one guy who I also respect deeply. His name is Mark Pilgrim. He is the author of Dive Into Python, and he once pulled a 410 for deleting everything [about him] on the internet. There’s actually a term for it: infocide for “information suicide.” He got fed up with the ecosystem, and if you think about the Ruby community, you might remember _why, the author of the Poignant Guide to Ruby. He did kind of the same thing. Focusing on what antirez has said, “once I started to receive money to work at Redis, it was no longer possible for my ethics to have my past pattern, so I started to force myself to work on the normal schedules. This, for me, is a huge struggle for many years. At this point, moreover, I’m sure I’m doing less than I could, because of that, but this is how things work”, so it feels like he feels guilty for maybe being forced into that work schedule and maybe not performing well enough. There are some signs of burnout for me somehow, and it’s that love-hate relationship of Open Source and money. If you accept money, it becomes a job, but you’re not writing code most of the time. You’re writing the talks, reviewing pull requests, you’re looking at issues, you’re answering questions on StackOverflow, you’re discussing on Discord, you’re marketing on YouTube or conferences. When you become popular with Open Source, then it feels like you have a choice between two options: one is depression and the other one is burnout. If your project does not become successful, then suddenly you think you’re a failure, you’re a mistake. It has zero stars; nobody likes it. But if it becomes a success, then everyone likes it, and you get hugged to death. That’s a really unfortunate situation to be in, and you want to stop being overwhelmed with those responsibilities. You have to set clear boundaries and pick your poison. You have to be careful if you accept companies as sponsors. I want to show you one example of how it can work and [point out] some risks. Earlier this year, I started working on a real project that I had been putting off for many years before. You see, in December 2015, I started a list of static analysis tools on Github. Static analysis tools are just tools that help you improve your code, and it turns out that there’s a lot of those tools. Just starting to collect them was the first step. I didn’t think much about it, but over time that became really popular. And you can see that this graph is more or less a linear increase in stars over time. In 2018, I started really thinking hard about whether there was more than just a Github project here. I talked to many people that I had this idea of building something more from that. It really took someone else to maybe push me over the finishing line and convinced me that this was worth it, and that is Jakub. He said, “why not build a website from it?” and over the course of maybe two weekends or so, we built a website. It’s built with Gatsby, but it really doesn’t matter. We just did it, and then we saw what happened to it. We render 500 tools right now, and the initial feedback was really great. People really seem to like that. We got a cool 720.000 requests on the first day, and over the next week or so, it more or less hit 1.5 million. That was great because suddenly people started getting interested in that project. So we started finding some sponsors. Those companies are special because they believe in your mission, but they also know how Open Source works. They don’t really expect you to advertise their tool. They want to sell to developers, so they want to be in the developers’ minds, saying: “Hey! You are a developer. We built this amazing tool you might want to check it out!” but they also get seen as an Open Source company. I think that’s a win-win. I have to say it doesn’t always go as easily. sometimes companies expect you to just have cheap advertising space. Then they jump off the moment they see you don’t get that many clicks, but others understand that they invest into something that maybe pays off in a year or two from now. So I’m really thankful that some companies understand that mission. However, what companies want is different than what individuals want. Companies want an invoice. Companies want something tax-deductible. Companies want someone that keeps the lights on and is responsive via email, so you really have those obligations, and one platform that helps with that is Open Collective. They have a 501c6 program for Open Source projects that acts as a fiscal host, which means they will do all the invoicing and officially be the maintainers. If you, as an Open Source maintainer or a contributor to a project, want to get [reimbursed for your work], you have to send an invoice to open collective. I think that’s the best of both worlds. Again, because it’s a very transparent process, companies are in the loop and don’t have to deal with all the financial stuff. But it also means that you have to really polish your public perception. Companies really want to know what they can get out of sponsoring you, and you have to make that very clear. Probably the most important site that you have is not your website, but it’s your sponsors page on Github where you describe the different tiers and what those tiers mean, so we have three tiers: One is targeted at smaller companies and freelancers. They just get exposure, and they get seen as an Open Source friendly tech company. That is a hundred dollars a month. We have a middle-tier, a company sponsor that maybe is a bigger company. They get the batch, too, but they also get a blog post about a static analysis tool that they want to promote, but we make it transparent that this is really a sponsored content. Finally, if you want to go all the way, you go to full content creation, which might be a video workshop, but we don’t have video workshop sponsors yet, so I cannot talk about that yet. I have to say I really would like to try though and it’s cheap really for what you get. Anyway, those are things that you can do today. Without really changing how you work on Open Source, you can set that up, and you just see how it goes. Maybe no one reacts, and that’s fine. Everything else on that list is kind of advanced. You need an audience, and so you should start with that. Paid learning material is something that we are doing with analysis tools in the future with a video course. There are companies like tailwind that do that impressively well, so you can learn from them. For merchandising, you have to have a brand. Hence, it’s not something that I could do, but someone like Mozilla or the Coding Train on YouTube could definitely do something like that. Consulting is always an option. Still, it’s also a lot more work and probably takes you away from what you really love, so it really becomes a job. You have to think about whether you want to do that or not. Enterprise services are very advanced [and interesting] for maybe the one percent of projects that can be run in a business and where you have special requirements. I have to say start from the top and work your way down. Start to create an audience. It’s probably easier to build an audience on Twitter and then funnel it back to Github than the other way around. Oh, by the way, did I tell you it’s hard? I really don’t want to end on a low note. I really want to emphasize that I would do it again, all of that if I started today. I think there’s no better time to contribute to Open Source than today. Probably tomorrow will even be a better time because suddenly, way more people are interested, it’s way easier to set up projects, you have all those free tools like VSCode and Github actions, free hosting. It’s just amazing how much you can pull off with very little money involved. So you can try it. What’s the worst thing that can happen? No one cares? Well, okay, then you’re as good as me. But I have some tips for you if you want to start today. My first tip is: “do your homework.” Many people start with learning, and then they build things, and then they close the circle, but there’s one key piece missing here. Some people hate the word, but you learn to love it eventually. It’s called marketing. Marketing means a lot of things to a lot of people, but what it means to me is getting the word out because someone else will if you don’t, and you are awesome; you just have to realize that. Maybe not everyone knows [about your project] right away, so you should really talk about it more. Maybe at conferences, maybe on Twitter, maybe you can just tell your friends. Maybe you can ask people to contribute and to support you. Somehow it’s frowned upon in the community that if you do marketing, you’re not doing it for real, but I think that’s not true. I think that if smart people and patient and passionate people did marketing, then the world would be a better place; because I’m pretty sure the evil guys do marketing. So do your homework, but rest assured that being an Open Source maintainer means running a business, and you are the product. You have to think about why someone would want to sponsor you because if you don’t come up with an answer for that, how should they know. Also, think about the funnel. How will people find you, for example? The best way for people to find you is probably starting a YouTube channel. There are easier ways, though. [First,] you can always help out in a different project, and you don’t even have to be a coder. If you are good with design, then I can tell you there are so many Open Source projects that need designers. It’s crazy. Maybe start creating a logo for a small project and start getting some visibility. Another one is having fun. If you know that earning money is hard in Open Source, then that can also be liberating because it means you can experiment and you can be creative, and yeah, having fun is the most important thing, I guess. Second, build things you love because it’s your free time in the end. The chances that someone will find the project is pretty low, so it better be something that you’re really interested in. If you don’t believe in that, just move on to the next thing. It’s fine if you drop a project that you don’t believe in anymore. No one will hold you accountable for that unless they are jerks, and you don’t want to be surrounded by jerks. Third, find friendly people because you really grow with your community. You want people that support your project and maybe eventually become maintainers to ease the burden, and that takes a lot of time, sometimes years, until you find one maintainer, so always be friendly, try to put yourself in their perspective. Go the extra mile if you can. For example, reintegrate the master branch into their pull request. Just do it for them. Say thanks twice if you’re unsure. Fourth is to grow an audience. Radical marketing is one way, but being approachable and being inclusive is another way. You want to be the guy or the girl that people go to when they have a tough question, or they want to know how to get into Open Source. You want to be the person that helps them out on their first pull request. They will pay it back a thousand times. The most exciting people I have met so far are available for questions, and they don’t really ask for anything in return. You hold them very close and dear to your heart. When the time comes, you will remember those people. We will say, like, “this is an amazing person to work with; I can highly recommend them,” which is called a lead. Finally, be in it for the long run. Good things take time. You see, it took me 10 years. Maybe it takes you five or maybe even less, but it’s probably not an overnight success. It’s really a long-term investment. " + }, + { + "title": "My Blog Just Got Faster: Cloudflare Workers and AVIF Support", + "url": "https://endler.dev/2020/perf/", + "body": " Did I mention that this website is fast? Oh yeah, I did, multiple times. Few reasons (from ordinary to the first signs of creeping insanity): 📄 Static site ☁️ Cached on Cloudflare CDN 🔗 ️HTTP&#x2F;2 and HTTP&#x2F;3 support 🚫 No web fonts (sadly) ✅ Edge-worker powered analytics (no Google Analytics) 🌸 Avoiding JavaScript whenever possible; CSS covers 90% of my use-cases. 🖼️ Image width and height specified in HTML to avoid page reflows. 👍🏻 Inlined, optimized SVG graphics and hand-rolled CSS 🚅 Static WASM search (lazy loaded) 🏎️ The entire homepage is &amp;lt;10K (brotli-compressed), including graphics, thus should fit into the first HTTP round-trip. 💟 Heck, even the favicon is optimized for size. Update: I’m using an SVG icon now thanks to this article. Then again, it’s 2020: everyone is optimizing their favicons, right? …right!? Well, it turns out most other sites don’t think about their user’s data plans as much as I do. Actually, that’s an understatement: they don’t care at all. But to me, lean is beautiful! Wait, What About Images? I prefer SVG for diagrams and illustrations. Only if it’s a photo, I’ll use JPEG or WebP. To be honest with you, I never really liked WebP. The gist is that it might not even be smaller than JPEGs compressed with MozJPEG. There is a lengthy debate on the Mozilla bug tracker if you want to read more. To this day, Safari doesn’t support WebP. Hello AVIF 👋 Meet AVIF, the new next-gen image compression format. Check this out: Source: ReachLightSpeed.com It’s already supported by Chrome 85 and Firefox 80. Then it hit me like a hurricane 🌪️: 😲 Holy smokes, AVIF is supported by major browsers now!? I want this for my blog! Yes and no. I’m using Zola for my blog, and AVIF support for Zola is not yet there, but I want it now! So I whipped up an ugly Rust script (as you do) that creates AVIF images from my old JPEG and PNG images. I keep the original raw files around just in case. Under the hood, it calls cavif by Kornel Lesiński. Data Savings The results of AVIF on the blog were nothing short of impressive: Total image size for endler.dev&#x2F;2020&#x2F;sponsors Check Your Browser But hold on for a sec… is your browser even capable of showing AVIF? If that reads “yup,” you’re all set. If that reads “nope,” then you have a few options: On Firefox: Open about:config from the address bar and search for avif. On Chrome: Make sure to update to the latest version. On Safari: I’m not sure what you’re doing with your life. Try a real browser instead. 😏 Workaround I: Fallback For Older Browsers HTML is great in that your browser ignores unknown new syntax. So I can use the &amp;lt;picture&amp;gt; element to serve the right format to you. (Look ma, no JavaScript!) &amp;lt;picture&amp;gt; &amp;lt;source srcset=&amp;quot;fancy_browser.avif&amp;quot; &#x2F;&amp;gt; &amp;lt;source srcset=&amp;quot;decent_browser.webp&amp;quot; &#x2F;&amp;gt; &amp;lt;img src=&amp;quot;meh_browser.jpg&amp;quot; &#x2F;&amp;gt; &amp;lt;&#x2F;picture&amp;gt; The real thing is a bit more convoluted, but you get the idea. Workaround II: Wrong Content-Type On Github Pages There was one ugly problem with Github and AVIF, though: Their server returned a Content-Type: application&#x2F;octet-stream header. This meant that the images did not load on Firefox. There is no way to fix that on my side as Github is hosting my page. Until now! I wanted to try Cloudflare’s Workers Sites for a long time, and this bug finally made me switch. Basically, I run the full website as an edge worker right on the CDN; no own web server is needed. What’s great about it is that the site is fast everywhere now — even in remote locations — no more roundtrips to a server. By running an edge worker, I also gained full control over the request- and response objects. I added this gem of a snippet to intercept the worker response: if (&#x2F;.avif$&#x2F;.test(url)) response.headers.set(&amp;quot;Content-Type&amp;quot;, &amp;quot;image&#x2F;avif&amp;quot;); response.headers.set(&amp;quot;Content-Disposition&amp;quot;, &amp;quot;inline&amp;quot;); And bam, Bob’s your uncle. Firefox is happy. You can read more about modifying response objects here. Another side-effect of Workers Sites is that a production deployment takes one minute now. Performance Results After Moving To Cloudflare Website response time before Source: KeyCDN Website response time after Source: KeyCDN Page size and rating before Source: Pingdom.com Page size and rating after Source: Pingdom.com I don’t have to hide from a comparison with well-known sites either: Comparison with some other blogs I read Source: Speedcurve Further reading How to Use AVIF: The New Next-Gen Image Compression Format — Nice introduction that highlights some common pitfalls when integrating AVIF. It inspired me to add AVIF support. AVIF has landed by Jake Archibald — Compares image sizes and qualities of different formats: SVG, JPEG, PNG, WebP, and AVIF. avif.io — Fast, configurable, client-side image compression that works on desktop and mobile. Squoosh — another image compression service built with WebAssembly that supports AVIF Tons of great examples on how to configure Cloudflare workers Cloudflare Workers Sites " + }, + { + "title": "Launching a Side Project Backed by Github Sponsors", + "url": "https://endler.dev/2020/sponsors/", + "body": "Yesterday we launched analysis-tools.dev, and boy had I underestimated the response. It’s a side project about comparing static code analysis tools. Static analysis helps improve code quality by detecting bugs in source code without even running it. What’s best about the project is that it’s completely open-source. We wanted to build a product that wouldn’t depend on showing ads or tracking users. Instead, we were asking for sponsors on Github — that’s it. We learned a lot in the process, and if you like to do the same, keep reading! First, Some Stats Everyone likes business metrics. Here are some of ours: The project started as an awesome list on Github in December 2015. We’re currently listing 470 static analysis tools. Traffic grew continuously. Counting 7.5k stars and over 190 contributors at the moment. 500-1000 unique users per week. I had the idea to build a website for years now, but my coworker Jakub joined in May 2020 to finally make it a reality. Github stars over time. That graph screams BUSINESS OPPORTUNITY. Source: star-history.t9t.io “Why did it take five years to build a website!?”, I hear you ask. Because I thought the idea was so obvious that others must have tried before and failed. I put it off, even though nobody stepped in to fill this niche. I put it off, even though I kept the list up-to-date for five years, just to learn about the tools out there. You get the gist: don’t put things off for too long. When ideas sound obvious, it’s probably because they are. Revenue Model It took a while to figure out how to support the project financially. We knew what we didn’t want: an SEO landfill backed by AdWords. Neither did we want to “sell user data” to trackers. We owe it to the contributors on Github to keep all data free for everyone. How could we still build a service around it? Initially, we thought about swallowing the infrastructure costs ourselves, but we’d have no incentive to maintain the site or extend it with new features. Github Sponsors was still quite new at that time. Yet, as soon as we realized that it was an option, it suddenly clicked: Companies that are not afraid of a comparison with the competition have an incentive to support an open platform that facilitates that. Furthermore, we could avoid bias and build a product that makes comparing objective and accessible. Sponsoring could be the antidote to soulless growth and instead allow us to build a lean, sustainable side business. We don’t expect analysis-tools.dev ever to be a full-time job. The market might be too small for that — and that’s fine. Tech Once we had a revenue model, we could focus on the tech. We’re both engineers, which helps with iterating quickly. Initially, I wanted to build something fancy with Yew. It’s a Rust&#x2F;Webassembly framework and your boy likes Rust&#x2F;Webassembly… I’m glad Jakub suggested something else: Gatsby. Now, let me be honest with you: I couldn’t care less about Gatsby. And that’s what I said to Jakub: “I couldn’t care less about Gatsby.” But that’s precisely the point: not being emotionally attached to something makes us focus on the job and not the tool. We get more stuff done! From there on, it was pretty much easy going: we used a starter template, Jakub showed me how the GraphQL integration worked, and we even got to use some Rust! The site runs on Cloudflare as an edge worker built on top of Rust. (Yeah, I cheated a bit.) Count to three, MVP! Finding Sponsors So we had our prototype but zero sponsors so far. What started now was (and still is) by far the hardest part: convincing people to support us. We were smart enough not to send cold e-mails because most companies ignore them. Instead, we turned to our network and realized that developers reached out before to add their company’s projects to the old static analysis list on Github. These were the people we contacted first. We tried to keep the messages short and personal. What worked best was a medium-sized e-mail with some context and a reminder that they contributed to the project before. We included a link to our sponsors page. Businesses want reliable partners and a reasonable value proposal, so a prerequisite is that the sponsor page has to be meticulously polished. Our Github Sponsors page Just like Star Wars Episode IX, we received mixed reviews: many people never replied, others passed the message on to their managers, which in turn never replied, while others again had no interest in sponsoring open-source projects in general. That’s all fair game: people are busy, and sponsorware is quite a new concept. A little rant: I’m of the opinion that tech businesses don’t nearly sponsor enough compared to all the value they get from Open Source. Would your company exist if there hadn’t been a free operating system like Linux or a web server like Nginx or Apache when it was founded? There was, however, a rare breed of respondents, which expressed interest but needed some guidance. For many, it is the first step towards sponsoring any developer through Github Sponsors &#x2F; OpenCollective. It helped that we use OpenCollective as our fiscal host, which handles invoicing and donation transfers. Their docs helped us a lot when getting started. The task of finding sponsors is never done, but it was very reassuring to hear from DeepCode - an AI-based semantic analysis service, that they were willing to take a chance on us. Thanks to them, we could push product over the finishing line. Because of them, we can keep the site free for everybody. It also means the website is kept free from ads and trackers. In turn, DeepCode gets exposed to many great developers that care about code quality and might become loyal customers. Also, they get recognized as an open-source-friendly tech company, which is more important than ever if you’re trying to sell dev tools. Win-win! Marketing Jakub and I both had started businesses before, but this was the first truly open product we would build. Phase 1: Ship early 🚀 We decided for a soft launch: deploy the site as early as possible and let the crawlers index it. The fact that the page is statically rendered and follows some basic SEO guidelines sure helped with improving our search engine rankings over time. Phase 2: Ask for feedback from your target audience 💬 After we got some organic traffic and our first votes, we reached out to our developer friends to test the page and vote on tools they know and love. This served as an early validation, and we got some honest feedback, which helped us catch the most blatant flaws. Phase 3: Prepare announcement post 📝 We wrote a blog post which, even if clickbaity, got the job done: Static Analysis is Broken — Let’s Fix It! It pretty much captures our frustration about the space and why building an open platform is important. We could have done a better job explaining the technical differences between the different analysis tools, but that’s for another day. Phase 4: Announce on social media 🔥 Shortly before the official announcement, we noticed that the search functionality was broken (of course). Turns out, we hit the free quota limit on Algolia a biiit earlier than expected. 😅 No biggie: quick exchange with Algolia’s customer support, and they moved us over to the open-source plan (which we didn’t know existed). We were back on track! Site note: Algolia customer support is top-notch. Responsive, tech-savvy, and helpful. Using Algolia turned out to be a great fit for our product. Response times are consistently in the low milliseconds and the integration with Gatsby was quick and easy. We got quite a bit of buzz from that tweet: 63 retweets, 86 likes and counting Clearly, everyone knew that we were asking for support here, but we are thankful for every single one that liked and retweeted. It’s one of these situations where having a network of like-minded people can help. As soon as we were confident that the site wasn’t completely broken, we set off to announce it on Lobste.rs (2 downvotes), &#x2F;r&#x2F;SideProject (3 upvotes) and Hacker News (173 upvotes, 57 comments). Social media is kind of unpredictable. It helps to cater the message to each audience and stay humble, though. The response from all of that marketing effort was nuts: Traffic on launch day Perhaps unsurprisingly, the Cloudflare edge workers didn’t break a sweat. Edge worker CPU time on Cloudflare My boss Xoan Vilas even did a quick performance analysis and he approved. (Thanks boss!) High fives all around! Now what? Of course, we’ll add new features; of course, we have more plans for the future, yada yada yada. Instead, let’s reflect on that milestone: a healthy little business with no ads or trackers, solely carried by sponsors. 🎉 Finally, I want you to look deep inside yourself and find your own little product to work on. It’s probably right in front of your nose, and like myself, you’ve been putting it off for too long. Well, not anymore! The next success story is yours. So go out and build things. Oh wait! …before you leave, would you mind checking out analysis-tools.dev and smashing that upvote button for a few tools you like? Hey, and if you feel super generous today (or you have a fabulous employer that cares about open-source), why not check out our sponsorship page? Jakub and me in Vienna, Austria. I’m not actually that small. " + }, + { + "title": "What Happened To Programming In The 2010s?", + "url": "https://endler.dev/2020/review/", + "body": "A while ago, I read an article titled “What Happened In The 2010s” by Fred Wilson. The post highlights key changes in technology and business during the last ten years. This inspired me to think about a much more narrow topic: What Happened To Programming In The 2010s? 🚓 I probably forgot like 90% of what actually happened. Please don’t sue me. My goal is to reflect on the past so that you can better predict the future. Where To Start? From a mile-high perspective, programming is still the same as a decade ago: Punch program into editor Feed to compiler (or interpreter) Bleep Boop 🤖 Receive output But if we take a closer look, a lot has changed around us. Many things we take for granted today didn’t exist a decade ago. What Happened Before? Back in 2009, we wrote jQuery plugins, ran websites on shared hosting services, and uploaded content via FTP. Sometimes code was copy-pasted from dubious forums, tutorials on blogs, or even hand-transcribed from books. Stack Overflow (which launched on 15th of September 2008) was still in its infancy. Version control was done with CVS or SVN — or not at all. I signed up for Github on 3rd of January 2010. Nobody had even heard of a Raspberry Pi (which only got released in 2012). Source: xkcd #2324 An Explosion Of New Programming Languages The last decade saw the creation of a vast number of new and exciting programming languages. Crystal, Dart, Elixir, Elm, Go, Julia, Kotlin, Nim, Rust, Swift, TypeScript all released their first stable version! Even more exciting: all of the above languages are developed in the open now, and the source code is freely available on Github. That means, everyone can contribute to their development — a big testament to Open Source. Each of those languages introduced new ideas that were not widespread before: Strong Type Systems: Kotlin and Swift made optional null types mainstream, TypeScript brought types to JavaScript, Algebraic datatypes are common in Kotlin, Swift, TypeScript, and Rust. Interoperability: Dart compiles to JavaScript, Elixir interfaces with Erlang, Kotlin with Java, and Swift with Objective-C. Better Performance: Go promoted Goroutines and channels for easier concurrency and impressed with a sub-millisecond Garbage Collector, while Rust avoids Garbage Collector overhead altogether thanks to ownership and borrowing. This is just a short list, but innovation in the programming language field has greatly accelerated. More Innovation in Older Languages Established languages didn’t stand still either. A few examples: C++ woke up from its long winter sleep and released C++11 after its last major release in 1998. It introduced numerous new features like Lambdas, auto pointers, and range-based loops to the language. At the beginning of the last decade, the latest PHP version was 5.3. We’re at 7.4 now. (We skipped 6.0, but I’m not ready to talk about it yet.) Along the way, it got over twice as fast. PHP is a truly modern programming language now with a thriving ecosystem. Heck, even Visual Basic has tuples now. (Sorry, I couldn’t resist.) Faster Release Cycles Most languages adopted a quicker release cycle. Here’s a list for some popular languages: LanguageCurrent release cycle Cirregular C#~ 12 months C++~ 3 years Go6 months Java6 months JavaScript (ECMAScript)12 months PHP12 months Python12 months Ruby12 months Rust6 weeks (!) Swift6 months Visual Basic .NET~ 24 months The Slow Death Of Null Close to the end of the last decade, in a talk from 25thof August 2009, Tony Hoare described the null pointer as his Billion Dollar Mistake. A study by the Chromium project found that 70% of their serious security bugs were memory safety problems (same for Microsoft). Fortunately, the notion that our memory safety problem isn’t bad coders has finally gained some traction. Many mainstream languages embraced safer alternatives to null: nullable types, Option, and Result types. Languages like Haskell had these features before, but they only gained popularity in the 2010s. Revenge of the Type System Closely related is the debate about type systems. The past decade has seen type systems make their stage comeback; TypeScript, Python, and PHP (just to name a few) started to embrace type systems. The trend goes towards type inference: add types to make your intent clearer for other humans and in the face of ambiguity — otherwise, skip them. Java, C++, Go, Kotlin, Swift, and Rust are popular examples with type inference support. I can only speak for myself, but I think writing Java has become a lot more ergonomic in the last few years. Exponential Growth Of Libraries and Frameworks As of today, npm hosts 1,330,634 packages. That’s over a million packages that somebody else is maintaining for you. Add another 160,488 Ruby gems, 243,984 Python projects, and top it off with 42,547 Rust crates. Number of packages for popular programming languages. Don’t ask me what happened to npm in 2019. Source: Module Counts Of course, there’s the occasional leftpad, but it also means that we have to write less library code ourselves and can focus on business value instead. On the other hand, there are more potential points of failure, and auditing is difficult. There is also a large number of outdated packages. For a more in-depth discussion, I recommend the Census II report by the Linux Foundation &amp;amp; Harvard [PDF]. We also went a bit crazy on frontend frameworks: Angular in 2010 React in 2013 Vue in 2014 Svelte in 2016 …and soon Yew? No Free Lunch A review like this wouldn’t be complete without taking a peek at Moore’s Law. It has held up surprisingly well in the last decade: Source: Wikipedia There’s a catch, though. Looking at single-core performance, the curve is flattening: Source: Standford University: The Future of Computing (video) The new transistors prophesied by Moore don’t make our CPUs faster but instead add other kinds of processing capabilities like more parallelism or hardware encryption. There is no free lunch anymore. Engineers have to find new ways of making their applications faster, e.g. by embracing concurrent execution. Callbacks, coroutines, and eventually async&#x2F;await are becoming industry standards. GPUs (Graphical Processing Units) became very powerful, allowing for massively parallel computations, which caused a renaissance of Machine Learning for practical use-cases: Deep learning becomes feasible, which leads to machine learning becoming integral to many widely used software services and applications. — Timeline of Machine Learning on Wikipedia Compute is ubiquitous, so in most cases, energy efficiency plays a more prominent role now than raw performance (at least for consumer devices). Unlikely Twists Of Fate Microsoft is a cool kid now. It acquired Github, announced the Windows subsystem for Linux (which should really be called Linux Subsystem for Windows), open sourced MS-DOS and .NET. Even the Microsoft Calculator is now open source. IBM acquired Red Hat. Linus Torvalds apologized for his behavior, took time off. Open source became the default for software development (?). Learnings If you’re now thinking: Matthias, you totally forgot X, then I brought that point home. This is not even close to everything that happened. You’d roughly need a decade to talk about all of it. Personally, I’m excited about the next ten years. Software is eating the world — at an ever-faster pace. " + }, + { + "title": "Tips for Faster Rust Compile Times", + "url": "https://endler.dev/2020/rust-compile-times/", + "body": " This post has moved to my other blog. It will be updated there in the future. " + }, + { + "title": "Gravity", + "url": "https://endler.dev/2020/gravity/", + "body": "Here’s a test to show your age: Do you still remember that funny JavaScript gravity effect, which Google used on their homepage ten years ago? This one? document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); I wanted to have some fun and integrated it into a website I was building. Unfortunately, it didn’t work out-of-the-box. It choked on some DOM elements that were not strictly classes (like SVG elements). So, in good hacker fashion, I quickly patched up the script (it’s just a three-line change), and now it’s back to its former glory. Test it here! (Caution: you’ll have to reload the page after that. 😏) Apply Gravity var myLink = document.getElementById(gravity); myLink.onclick = function() var script = document.createElement( script ); script.type = text&#x2F;javascript ; script.src = gravity.js ; document.getElementsByTagName( head )[0].appendChild(script); return false; Anyway, feel free to add it to your own sites and have some fun. It’s also great to prank your friends. Simply add that single line to any website and weeee! &amp;lt;script type=&amp;quot;text&#x2F;javascript&amp;quot; src=&amp;quot;https:&#x2F;&#x2F;endler.dev&#x2F;2020&#x2F;gravity&#x2F;gravity.js&amp;quot; &amp;gt;&amp;lt;&#x2F;script&amp;gt; Sometimes I miss those simple times of the early web… " + }, + { + "title": "Hacker Folklore", + "url": "https://endler.dev/2020/folklore/", + "body": "Some computer terms have a surprising legacy. Many of them are derived from long-obsolete technologies. This post tries to dust off the exciting history of some of these terms that we use every day but aren’t quite sure about their origins. Let’s jump right in! Bike-Shedding Today’s meaning: A pointless discussion about trivial issues. The term bike-shed effect or bike-shedding was coined as a metaphor to illuminate the law of triviality; it was popularised in the Berkeley Software Distribution community by the Danish computer developer Poul-Henning Kamp in 1999 on the FreeBSD mailing list and has spread from there to the whole software industry. The concept was first presented as a corollary of his broader “Parkinson’s law” spoof of management. He dramatizes this “law of triviality” with the example of a committee’s deliberations on an atomic reactor, contrasting it to deliberations on a bicycle shed. As he put it: “The time spent on any item of the agenda will be in inverse proportion to the sum of money involved.” A reactor is so vastly expensive and complicated that an average person cannot understand it, so one assumes that those who work on it understand it. On the other hand, everyone can visualize a cheap, simple bicycle shed, so planning one can result in endless discussions because everyone involved wants to add a touch and show personal contribution. Reference - Wikipedia: Law of Triviality Boilerplate An old machine that bended steel plates to water boilers. Source: Wikimedia Commons Today’s meaning: A chunk of code that is copied over and over again with little or no changes made to it in the process. Boiler plate originally referred to the rolled steel used to make water boilers but is used in the media to refer to hackneyed or unoriginal writing. The term refers to the metal printing plates of pre-prepared text such as advertisements or syndicated columns that were distributed to small, local newspapers. These printing plates came to be known as ‘boilerplates’ by analogy. One large supplier to newspapers of this kind of boilerplate was the Western Newspaper Union, which supplied “ready-to-print stories [which] contained national or international news” to papers with smaller geographic footprints, which could include advertisements pre-printed next to the conventional content. References: Wikipedia Stack Overflow StackExchange - English Language &amp;amp; Usage The man in the foreground is holding a rounded printing plate. Plates like this were provided by companies such as Western Newspaper Union to many smaller newspapers. Source: Wikimedia Commons Boot &#x2F; Reboot &#x2F; Bootstrapping Lithography of Baron Münchhausen pulling himself out of a swamp by his pigtail Source: Wikimedia The term boot is used in the context of computers to refer to the process of starting a computer. In compiler development, the term bootstrapping refers to the process of rewriting a compiler in a new language: The first compiler is written in an existing language. Then it gets rewritten in the new language and compiled by itself. The saying “to pull oneself up by one’s bootstraps” dates back to the 19th century. Tall boots may have a tab, loop or handle at the top allowing one to help pulling them on. The metaphor spawned additional metaphors for self-sustaining processes that proceed without external help. According to Wikipedia, The idiom dates at least to 1834, when it appeared in the Workingman’s Advocate: “It is conjectured that Mr. Murphee will now be enabled to hand himself over the Cumberland river or a barn yard fence by the straps of his boots.” There’s also a nice summary in Merriam-Webster. Bug Today’s meaning: A defect in a piece of code or hardware. The origins are unknown! Contrary to popular belief it predates the bug found by Grace Hopper in the Mark II computer. The term was used by engineers way before that; at least since the 1870s. It predates electronic computers and computer software. Thomas Edison used the term “bug” in his notes. Reference Bit The term’s invention is credited to John W. Tukey, who in a memo written for Bell Labs on January 9, 1947, had shortened “binary information digit” to “bit”. Reference Byte The term “byte” was first introduced by Werner Buchholz in June 1956. This was during the initial design stage for the IBM Stretch computer. The computer had a design that enabled addressing down to the individual bit and allowed variable field length instructions, with the size of the byte encoded into the instruction itself. The choice of spelling as “byte” instead of “bite” was intentional to prevent any accidental alteration to “bit”. Carriage Return and Line Feed Today’s meaning: Set the cursor to the beginning of the next line. These two terms were adopted from typewriters. The carriage holds the paper and is moving from left to right to advance the typing position as the keys are pressed. It “carries” the paper with it. The carriage return is the operation when the carriage gets moved into its original position on the very left end side of the paper. Simply returning the carriage to the left is not enough to start with a new line, however. The carriage would still be on the same line than before — just at the beginning of the line. To go to a new line, a line feed was needed. It would move the paper inside the typewriter up by one line. These two operations — carriage return (CR) and line feed (LF) — were commonly done at once by pushing the carriage return lever. A mechanical typewriter. The lever for the carriage return is on the outer left side. Source: Source: piqsels On Unix systems (like Linux or macOS), a n still stands for a line feed (ASCII symbol: LF) or newline. On CP&#x2F;M, DOS, and Windows, rn is used, where r stands for carriage return and n stands for line feed (CR+LF). Reference Here is an old video that shows the basic mechanics of carriage return and line-feed: document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); Command key symbol (⌘) Today’s meaning: A meta-key available on Apple computers to provide additional keyboard combinations. Directly quoting Wikipedia (emphasis mine): The ⌘ symbol came into the Macintosh project at a late stage. The development team originally went for their old Apple key, but Steve Jobs found it frustrating when “apples” filled up the Mac’s menus next to the key commands, because he felt that this was an over-use of the company logo. He then opted for a different key symbol. With only a few days left before deadline, the team’s bitmap artist Susan Kare started researching for the Apple logo’s successor. She was browsing through a symbol dictionary when she came across the cloverleaf-like symbol, commonly used in Nordic countries as an indicator of cultural locations and places of interest (it is the official road sign for tourist attraction in Denmark, Finland, Iceland, Norway, and Sweden and the computer key has often been called Fornminne — ancient monument — by Swedish Mac users and Seværdighedstegn by Danish users). When she showed it to the rest of the team, everyone liked it, and so it became the symbol of the 1984 Macintosh command key. Susan Kare states that it has since been told to her that the symbol had been picked for its Scandinavian usage due to its resembling the shape of a square castle with round corner towers as seen from above looking down, notably Borgholm Castle. Norwegian Severdighet road sign Source: Wikimedia Commons Aearial view of Borgholm Castle, which could have been the model for the symbol Source: Wikimedia Commons References: Wikipedia: Command Key Cult of Mac: What Are The Mac’s Command ⌘ And Option ⌥ Symbols Supposed To Represent? Cookie Today’s meaning: A small piece of data sent from a website and stored in the user’s web browser. The term cookie was coined by 23-year-old web browser programmer Lou Montulli in the fall of 1994. It was inspired by the term magic cookie, which is a packet of data a program receives and sends back unchanged, used by Unix programmers. This term in turn derives from the fortune cookie, which is a cookie with an embedded message. Montulli used the term cookie to describe the small packets of data that the web browser receives and sends back unchanged to the web server. “So, yeah, the cookie,” Montulli says with a laugh. “It’s one week of my life that turned into the most important thing that I ever did.” (Reference) Core Dump Today’s meaning: Retrieving a snapshot of a (crashed) program’s state by storing all of its memory for offline analysis. The name comes from magnetic core memory, which is an early storage mechanism based on a grid of toroid magnets. It has since become obsolete, but the term is still used today for getting a snapshot of a computer process. Reference A 32 x 32 core memory plane storing 1024 bits (or 128 bytes) of data. The first core dumps were printed on paper, which sounds reasonable given these small amounts of bytes. Source: Wikimedia Commons Cursor Today’s meaning: a visual cue (such as a flashing vertical line) on a video display that indicates position (as for data entry). Merriam-Webster Cursor is Latin for runner. A cursor is the name given to the transparent slide engraved with a hairline that is used for marking a point on a slide rule. The term was then transferred to computers through analogy. Reference Source: A December 1951 advertisement for the IBM 604 Electronic Calculating Punch that was first produced in 1948. The advertisement claims the IBM 604 can do the work of 150 engineers with slide rules. The cursor (or runner) is the transparent part in the middle of the slide. Daemon In computing, a daemon is a background process that handles requests for services such as print spooling and file transfers, and then terminates. The term was coined by the programmers of MIT’s Project MAC (Mathematics and Computation) in 1963. They took the name from Maxwell’s demon, a hypothetical creature from a thought experiment that constantly works in the background, sorting molecules. The MIT programmers thought demon would be an appropriate name for a background process that worked tirelessly to perform system chores. But instead of using the term demon, they used daemon, which is an older form of the word. (Reference) Dashboard Today’s meaning: A user interface that provides a quick overview of a system’s status. Originally a plank of wood at the front of a horse-drawn carriage to protect the driver from mud ‘dashed’ backward by a horses hooves. When automobiles were manufactured, the board in front of the driver was given the same name. That was the logical place to put the necessary gauges so the driver could see them easily. In time, the term became more associated with the readouts than the protection it offered. Reference A dashboard of a horse carriage. Source: Wikimedia Commons Firewall Today’s meaning: A network security system that establishes a barrier between a trusted internal network and an untrusted external network, such as the Internet. Fire walls are used mainly in terraced houses, but also in individual residential buildings. They prevent fire and smoke from spreading to another part of the building in the event of a fire. Large fires can thus be prevented. The term is used in computing since the 80s. Reference Firewall residential construction, separating the building into two separate residential units, and fire areas. Source: Wikimedia Commons Firmware Today’s meaning: A class of computer software that provides the low-level control for the device’s specific hardware and closely tied to the hardware it runs on. Ascher Opler coined the term firmware in a 1967 Datamation article. As originally used, firmware contrasted with hardware (the CPU itself) and software (normal instructions executing on a CPU). It existed on the boundary between hardware and software; thus the name “firmware”. The original article is available on the Internet Archive. Reference Foo and Bar Today’s meaning: Common placeholder variable names. Originally the term might come from the military term FUBAR. There are a few variations, but a common meaning is FUBAR: “f***ed up beyond all recognition”. The use of foo in a programming context is generally credited to the Tech Model Railroad Club (TMRC) of MIT from circa 1960. In the complex model system, there were scram switches located at numerous places around the room that could be thrown if something undesirable was about to occur, such as a train going full-bore at an obstruction. The way I understood it was that they literally had emergency buttons labeled foo for lack of a better name. Maybe related to the original military meaning of FUBAR to indicate that something is going very very wrong. A scram switch (button), that could be pressed to prevent inadvertent operation. Maybe the TMRC had buttons labeled foo instead Source: Source Wikimedia Commons References: Wikipedia Stack Overflow. Freelancer Today’s meaning: A self-employed person, which is not committed to a particular employer long-term. The term first appears in the novel Ivanhoe by Sir Walter Scott. (The novel also had a lasting influence on the Robin Hood legend.) Cover of a Classic Comics book Source: Wikimedia Commons In it, a Lord offers his paid army of ‘free lances’ to King Richard: I offered Richard the service of my Free Lances, and he refused them — I will lead them to Hull, seize on shipping, and embark for Flanders; thanks to the bustling times, a man of action will always find employment. Therefore, a “free lancer” is someone who fights for whoever pays the most. Free does not mean “without pay”, but refers to the additional freedom to work for any employer. Reference Hash Today’s meaning: A hash function is any function that can be used to map data of arbitrary size to fixed-size values. According to Wikipedia, the use of the word “hash” in hash function “comes by way of analogy with its non-technical meaning, to “chop and mix”. Indeed, typical hash functions, like the mod operation, “chop” the input domain into many sub-domains that get “mixed” into the output range to improve the uniformity of the key distribution.“ References: Software Engineering StackExchange Wikipedia Log &#x2F; Logfile Today’s meaning: A file that records events of a computer program or system. Sailors used so-called log lines to measure the speed of their ship. A flat piece of wood (the log) was attached to a long rope. The log had regularly spaced knots in it. As the log would drift away, the sailors would count the number of knots that went out in a fixed time interval, and this would be the ship’s speed — in knots. The ship’s speed was important for navigation, so the sailors noted it down in a book, aptly called the log book, together with other information to establish the position of the ship more accurately, like landmark sightings and weather events. Later, additional information, more generally concerning the ship, was added — or logged — such as harbor fees and abnormal provision depletion. Reference. Sailors measuring ship speed with a log line Source: The Pilgrims &amp;amp; Plymouth Colony:1620 by Duane A. Cline The parts of a log-line Source: The Pilgrims &amp;amp; Plymouth Colony:1620 by Duane A. Cline Page from the log-file of the British Winchelsea. The second column denotes the number of knots measured with the log-line, which indicates the ship’s speed Source: Navigation and Logbooks in the Age of Sail by Peter Reaveley Patch Today’s meaning: A piece of code that can be applied to fix or improve a computer program. In the early days of computing history, if you made a programming mistake, you’d have to fix a paper tape or a punched card by putting a patch on top of a hole. A program tape with physical patches used to correct punched holes by covering them. Source: Smithsonian Archives Center Ping Today’s meaning: A way to check the availability and response time of a computer over the network. Ping is a terminal program originally written by Mike Muuss in 1983 that is included in every version of UNIX, Windows, and macOS. He named it “after the sound that a sonar makes, inspired by the whole principle of echo-location. […] ping uses timed IP&#x2F;ICMP ECHO_REQUEST and ECHO_REPLY packets to probe the “distance” to the target machine.“ The reference is well worth a read. Pixel Today’s meaning: The smallest controllable element of a picture represented on the screen. The word pixel is a combination of pix (from “pictures”, shortened to “pics”) and el (for “element”). Similarly, voxel is a volume element and texel is a texture element. Reference Shell Today’s meaning: An interactive, commonly text-based runtime to interact with a computer system. The inventor of the term, Louis Pouzin, does not give an explanation for the name in his essay The Origins of the Shell. It can however be traced back to Unix’ predecessor Multics. It is described in the Multics glossary like so: [The shell] is passed a command line for execution by the listener. The The New Hacker’s Dictionary, (also known as the Jargon File) by Eric S. Raymond contains the following: Historical note: Apparently, the original Multics shell (sense 1) was so called because it was a shell (sense 3); where sense 3 refers to A skeleton program, created by hand or by another program (like, say, a parser generator), which provides the necessary incantations to set up some task and the control flow to drive it (the term driver is sometimes used synonymously). The user is meant to fill in whatever code is needed to get real work done. This usage is common in the AI and Microsoft Windows worlds, and confuses Unix hackers. Unfortunately, the book does not provide any evidence to back up this claim. I like the (possibly historically incorrect) analogy to a nut with the shell being on the outside, protecting the kernel. Reference Slab allocator Today’s meaning: An efficient memory allocation technique, which reuses previous allocations. Slab allocation was invented by John Bonwick (Note: PDF file) in 1994 and has since been used by services like Memcached and the Linux Kernel. With slab allocation, a cache for a certain type or size of data object has a number of pre-allocated “slabs” of memory; within each slab there are memory chunks of fixed size suitable for the objects. (Wikpedia) The name slab comes from a teenage friend of Bonwick. He tells the story on the Oracle blog: While watching TV together, a commercial by Kellogg’s came on with the tag line, “Can you pinch an inch?” The implication was that you were overweight if you could pinch more than an inch of fat on your waist — and that hoovering a bowl of corn flakes would help. Without missing a beat, Tommy, who weighed about 250 pounds, reached for his midsection and offered his response: “Hell, I can grab a slab!” A decade later, Bonwick remembered that term when he was looking for a word to describe the allocation of a larger chunk of memory. Here is the original Kellogg’s advertisement: document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); Spam Today’s meaning: Unsolicited electronic communications, for example by sending mass-emails or posting in forums and chats. The term goes back to a sketch by the British comedy group Monty Python from 1970. In the sketch, a cafe is including Spam (a brand of canned cooked pork) in almost every dish. Spam is a portmanteau of spiced and ham. The excessive amount of Spam mentioned is a reference to the ubiquity of it and other imported canned meat products in the UK after World War II (a period of rationing in the UK) as the country struggled to rebuild its agricultural base. Reference Vintage Ad: Look What You Can Do With One Can of Spam Source: By user Jamie (jbcurio) on flickr.com document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); Monty Pythons Flying Circus (1974) - SPAM from Testing Tester on Vimeo. Mainframe Today’s meaning: A large computer system, often used by large organizations. Originally, the term referred to the frame that held the main components of a computer. The main components were the CPU, memory, and I&#x2F;O devices. The term was used in the 1960s and 1970s when computers were large and required a lot of space. This diagram shows how the IBM 701 mainframe swings open for access to the circuitry. From ‘Type 701 EDPM [Electronic Data Processing Machine] Installation Manual’, IBM. From Computer History Museum archives. Read more about that on Ken Shirriff’s blog. Radio Button Today’s meaning: A UI element that allows to choose from a predefined set of mutually exclusive options “Radio buttons” are named after the analogous pendant of mechanical buttons that were used in radios. The UI concept has later been used in tape recorders, cassette recorders and wearable audio players (the famous “Walkman” and similar). And later in VCRs and video cameras. Reference An old car radio (left) and CSS radio buttons (right). Only a single option can be selected at any point in time. As a kid, I would push two buttons at once so they would interlock. Good times. Source: Images by Matt Coady Uppercase and lowercase Today’s meaning: Distinction between capital letters and small letters on a keyboard. Back when typesetting was a manual process where single letters made of led were “type set” to form words and sentences, upper- and lowercase letters were kept in separate containers — or cases — to make this rather tedious process a little faster. A set of printers cases Source: From the book ‘Printing types, their history, forms, and use; a study in survivals’ by Updike, Daniel Berkeley, 1860-1941. Freely available on archive.org. Honorable mentions 404 Today’s meaning: HTTP Status Code for “File not found”. There is a story that the number comes from the server room where the World Wide Web’s central database was located. In there, administrators would manually locate the requested files and transfer them, over the network, to the person who made that request. If a file didn’t exist, they’d return an error message: “Room 404: file not found”. This, however, seems to be a myth and the status code was chosen rather arbitrarily based on the then well-established FTP status codes. Reference Programming languages and Abbreviations The etymology of programming language names and common abbreviations would probably warrant its own article, but I’ve decided to note down some of my favorites for the time being. C++ C++ is a programming language based on C by Bjarne Stroustrup. The name is a programmer pun by Rick Mascitti, a coworker of Stroustrup. The ++ refers to the post-increment operator, that is common in many C-like languages. It increases the value of a variable by 1. In that sense, C++ can be seen as the spiritual “successor” of C. Reference C Sharp Similarly to C++, C# is a C-like programming language. The name again refers to “incremental” improvements on top of C++. The # in the name looks like four plus signs. Hence C# == (C++)++. But on top of that, the name was also inspired by the musical notation where a sharp indicates that the written note should be made a semitone higher in pitch. Reference A C-Sharp note. Source: Wikimedia Commons PNG Officially, PNG stands for Portable Network Graphics. It was born out of frustration over a CompuServe announcement in 1994 that programs supporting GIF would have to pay licensing fees from now on. A working group lead by hacker Thomas Boutell created the .webp file format, a patent-free replacement for GIF. Therefore I prefer the format’s unofficial name: PNG’s Not GIF. Here’s a great article on PNG’s history. Reference Credits Most of the content comes from sources like Wikipedia (with reference where appropriate), but the explanations are difficult to hunt down if you don’t know what you’re looking for. This is a living document, and I’m planning to update it in case of reader submissions. Conclusion You have to know the past to understand the present. — Dr. Carl Sagan (1980) I hope you enjoyed this trip down memory lane. Now it’s your turn! 👉 Do you know any other stories? Send me a message, and I’ll add them here. Related Projects Awesome Computer History: A curated list of computer history videos, documentaries and related folklore maintained by Thomas Watson. Wikipedia: List of computer term etymologies: List of the origins of computer-related terms or terms used in the computing world. Talk: The Etymology of Programming by Brittany Storoz - JSConf EU 2018: A talk that explains the background behind a few programming terms. Careful here: the explanation for “bug” is probably wrong as mentioned above. Typewriter terminology that has survived into the personal computer era: A list of computer terms that have their origins from typewriters. Folklore - The Original Macintosh: Anecdotes about the development of Apple’s original Macintosh, and the people who made it. " + }, + { + "title": "A Timelapse of Timelapse", + "url": "https://endler.dev/2020/timelapse/", + "body": "Timelapse is a little open-source screen recorder for macOS. It takes a screenshot every second and creates a movie in the end. To celebrate its unlikely 1.0 release today, I present here a “timelapse” of this project’s journey. It just took ten years to get here. document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); 2011 - How it all began To be honest, I don’t remember why I initially wrote the tool. I must have had a personal need for a screen recorder, I guess… In May 2011, when I started the project, I was doing my Masters Degree in Computer Science. I might have needed the tool for University; most likely, however, I was just trying to find an excuse for not working on an assignment. During that time, I wrote a lot of tools like that. Mainly to scratch a personal itch, learn a new programming language, or just have fun. Among them are tools like a random sandwich generator for Subway (the American fast-food chain), DrawRoom, a keyboard-driven drawing app inspired by WriteRoom, and the obligatory CMS software, that I sold to clients. Surprisingly, none of them were a great success. DrawRoom, a tool that I wrote around the same time, is a real piece of art. To this day it has five commits and a single Github star (by myself, don’t judge…). What I do know for sure is that I was unhappy with all existing screen recorders. They could roughly be categorized into these three groups: Proprietary solutions that cost money or could call home. Tools that didn’t work on macOS. Small, fragile, one-off scripts that people passed around in forums or as Github gists. They rarely worked as advertised. Among the remaining tools were none that provided any timelapse functionality; so I set out to write my own. This all sounds very epic, but in reality, I worked on it for a day. After five heroic commits on May 11, 2011, it sat there, idle, for seven years… 2018 A lot of time elapsed before anything exciting happened. In January ’18, seemingly out of nowhere, the first user filed a bug report. It was titled hung when creating the avi 😱. Turns out that a game developer from Canada, juul1a, was trying to use the tool to track her progress on an indie game — how cool is that? To help her out, I decided to do some general cleanup, finally write down some instructions on how to even use the program, add a requirements.txt, and port the tool from mencoder to ffmpeg. After that, timelapse was ready for prime-time. 🎬 Here is some live action from her videos featuring timelapses: document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); At that point, the tool was still very wobbly and could only be used from the commandline, but I began to see some potential for building a proper app from it; I just never found the time. In October ’18, I decided to ask for support during Hacktoberfest. I created a few tickets and labeled them with hacktoberfest to try and find contributors. And then, I waited. First, Shreya V Prabhu fixed an issue where a new recording was overwriting the previous one by adding a timestamp to the video name. Then Abner Campanha and Shane Creedon (no longer on Github) created a basic test structure. Gbenro Selere added a CI pipeline for Travis CI. It really worked, and the project was in much better shape after that! 2019 One year passes by, and Kyle Jones adds some contribution guidelines, while I move the CI pipeline to the newly released Github actions. Chaitanya fixed a bug where the program would hang when the recording stopped by moving the video creation from threads to a separate process. He continued to make the codebase more robust and became a core contributor, reviewing pull requests and handling releases. Thanks to orcutt989, the app now made use of type hints in Python 3.6. gkpln3 added support for multi-monitor configurations. The screen captured will always be the one with the mouse on it. 2020 Fast forward to today, and after almost ten years, we finally created a true macOS app using the awesome py2app bundler. This should make the tool usable by non-developers. Back to the Future We reached the end of our little journey. A long time has passed until 1.0. This project is a testament to the wonders of open source collaboration, and I am proud to work on it with contributors from around the world. It doesn’t have to be a life-changing project to bring people together who have fun building things. If this were the end of the story, I’d be okay with that. I doubt it, though. Here’s to the next ten years! 🎬 Download timelapse on Github. Bonus The video at the beginning is a timelapse of how I finish this article. How meta. " + }, + { + "title": "A Tiny, Static, Full-Text Search Engine using Rust and WebAssembly", + "url": "https://endler.dev/2019/tinysearch/", + "body": " I wrote a basic search module that you can add to a static website. It’s very lightweight (50kB-100kB gzipped) and works with Hugo, Zola, and Jekyll. Only searching for entire words is supported. Try the search box on the left for a demo. The code is on Github. Static site generators are magical. They combine the best of both worlds: dynamic content without sacrificing performance. Over the years, this blog has been running on Jekyll, Cobalt, and, lately, Zola. One thing I always disliked, however, was the fact that static websites don’t come with “static” search engines, too. Instead, people resort to custom Google searches, external search engines like Algolia, or pure JavaScript-based solutions like lunr.js or elasticlunr. All of these work fine for most sites, but it never felt like the final answer. I didn’t want to add yet another dependency on Google; neither did I want to use a stand-alone web-backend like Algolia, which adds latency and is proprietary. On the other side, I’m not a huge fan of JavaScript-heavy websites. For example, just the search indices that lunr creates can be multiple megabytes in size. That feels lavish - even by today’s bandwidth standards. On top of that, parsing JavaScript is still time-consuming. I wanted some simple, lean, and self-contained search, that could be deployed next to my other static content. As a consequence, I refrained from adding search functionality to my blog at all. That’s unfortunate because, with a growing number of articles, it gets harder and harder to find relevant content. The Idea Many years ago, in 2013, I read “Writing a full-text search engine using Bloom filters” — and it was a revelation. The idea was simple: Let’s run all my blog articles through a generator that creates a tiny, self-contained search index using this magical data structure called a ✨Bloom Filter ✨. Wait, what’s a Bloom Filter? A Bloom filter is a space-efficient way to check if an element is in a set. The trick is that it doesn’t store the elements themselves; it just knows with some confidence that they were stored before. In our case, it can say with a certain error rate that a word is in an article. A Bloom filter stores a ‘fingerprint’ (a number of hash values) of all input values instead of the raw input. The result is a low-memory-footprint data structure. This is an example of ‘hello’ as an input. Here’s the Python code from the original article that generates the Bloom filters for each post (courtesy of Stavros Korokithakis): filters = for name, words in split_posts.items(): filters[name] = BloomFilter(capacity=len(words), error_rate=0.1) for word in words: filters[name].add(word) The memory footprint is extremely small, thanks to error_rate, which allows for a negligible number of false positives. I immediately knew that I wanted something like this for my homepage. My idea was to directly ship the Bloom filters and the search engine to the browser. I could finally have a small, static search without the need for a backend! Headaches Disillusionment came quickly. I had no idea how to bundle and minimize the generated Bloom filters, let alone run them on clients. The original article briefly touches on this: You need to implement a Bloom filter algorithm on the client-side. This will probably not be much longer than the inverted index search algorithm, but it’s still probably a bit more complicated. I didn’t feel confident enough in my JavaScript skills to pull this off. Back in 2013, NPM was a mere three years old, and WebPack just turned one, so I also didn’t know where to look for existing solutions. Unsure what to do next, my idea remained a pipe dream. A New Hope Five years later, in 2018, the web had become a different place. Bundlers were ubiquitous, and the Node ecosystem was flourishing. One thing, in particular, revived my dreams about the tiny static search engine: WebAssembly. WebAssembly (abbreviated Wasm) is a binary instruction format for a stack-based virtual machine. Wasm is designed as a portable target for compilation of high-level languages like C&#x2F;C++&#x2F;Rust, enabling deployment on the web for client and server applications. [source] This meant that I could use a language that I was familiar with to write the client-side code — Rust! 🎉 My journey started with a prototype back in January 2018. It was just a direct port of the Python version from above: let mut filters = HashMap::new(); for (name, words) in articles let mut filter = BloomFilter::with_rate(0.1, words.len() as u32); for word in words filter.insert(&amp;amp;word); filters.insert(name, filter); While I managed to create the Bloom filters for every article, I still had no clue how to package it for the web… until wasm-pack came along in February 2018. Whoops! I Shipped Some Rust Code To Your Browser. Now I had all the pieces of the puzzle: Rust — A language I was comfortable with wasm-pack — A bundler for WebAssembly modules A working prototype that served as a proof-of-concept The search box you see on the left side of this page is the outcome. It fully runs on Rust using WebAssembly (a.k.a the RAW stack). Try it now if you like. There were quite a few obstacles along the way. Bloom Filter Crates I looked into a few Rust libraries (crates) that implement Bloom filters. First, I tried jedisct1’s rust-bloom-filter, but the types didn’t implement Serialize&#x2F;Deserialize. This meant that I could not store my generated Bloom filters inside the binary and load them on the client-side. After trying a few others, I found the cuckoofilter crate, which supported serialization. The behavior is similar to Bloom filters, but if you’re interested in the differences, you can look at this summary. Here’s how to use it: let mut cf = cuckoofilter::new(); &#x2F;&#x2F; Add data to the filter let value: &amp;amp;str = &amp;quot;hello world&amp;quot;; let success = cf.add(value)?; &#x2F;&#x2F; Lookup if data was added before let success = cf.contains(value); &#x2F;&#x2F; success ==&amp;gt; true Let’s check the output size when bundling the filters for ten articles on my blog using cuckoo filters: ~&#x2F;C&#x2F;p&#x2F;tinysearch ❯❯❯ l storage Permissions Size User Date Modified Name .rw-r--r-- 44k mendler 24 Mar 15:42 storage 44kB doesn’t sound too shabby, but these are just the cuckoo filters for ten articles, serialized as a Rust binary. On top of that, we have to add the search functionality and the helper code. In total, the client-side code weighed in at 216kB using vanilla wasm-pack. Too much. Trimming Binary Size After the sobering first result of 216kB for our initial prototype, we have a few options to bring the binary size down. The first is following johnthagen’s advice on minimizing Rust binary size. By setting a few options in our Cargo.toml, we can shave off quite a few bytes: &amp;quot;opt-level = &amp;#39;z&amp;#39;&amp;quot; =&amp;gt; 249665 bytes &amp;quot;lto = true&amp;quot; =&amp;gt; 202516 bytes &amp;quot;opt-level = &amp;#39;s&amp;#39;&amp;quot; =&amp;gt; 195950 bytes Setting opt-level to s means we trade size for speed, but we’re preliminarily interested in minimal size anyway. After all, a small download size also improves performance. Next, we can try wee_alloc, an alternative Rust allocator producing a small .wasm code size. It is geared towards code that makes a handful of initial dynamically sized allocations, and then performs its heavy lifting without any further allocations. This scenario requires some allocator to exist, but we are more than happy to trade allocation performance for small code size. Exactly what we want. Let’s try! &amp;quot;wee_alloc and nightly&amp;quot; =&amp;gt; 187560 bytes We shaved off another 4% from our binary. Out of curiosity, I tried to set codegen-units to 1, meaning we only use a single thread for code generation. Surprisingly, this resulted in a slightly smaller binary size. &amp;quot;codegen-units = 1&amp;quot; =&amp;gt; 183294 bytes Then I got word of a Wasm optimizer called binaryen. On macOS, it’s available through homebrew: brew install binaryen It ships a binary called wasm-opt and that shaved off another 15%: &amp;quot;wasm-opt -Oz&amp;quot; =&amp;gt; 154413 bytes Then I removed web-sys as we don’t have to bind to the DOM: 152858 bytes. There’s a tool called twiggy to profile the code size of Wasm binaries. It printed the following output: twiggy top -n 20 pkg&#x2F;tinysearch_bg.wasm Shallow Bytes │ Shallow % │ Item ─────────────┼───────────┼──────────────────────────────── 79256 ┊ 44.37% ┊ data[0] 13886 ┊ 7.77% ┊ &amp;quot;function names&amp;quot; subsection 7289 ┊ 4.08% ┊ data[1] 6888 ┊ 3.86% ┊ core::fmt::float::float_to_decimal_common_shortest::hdd201d50dffd0509 6080 ┊ 3.40% ┊ core::fmt::float::float_to_decimal_common_exact::hcb5f56a54ebe7361 5972 ┊ 3.34% ┊ std::sync::once::Once::call_once:: closure ::ha520deb2caa7e231 5869 ┊ 3.29% ┊ search From what I can tell, the biggest chunk of our binary is occupied by the raw data section for our articles. Next up, we got the function headers and some float to decimal helper functions, that most likely come from deserialization. Finally, I tried wasm-snip, which replaces a WebAssembly function’s body with an unreachable like so, but it didn’t reduce code size: wasm-snip --snip-rust-fmt-code --snip-rust-panicking-code -o pkg&#x2F;tinysearch_bg_snip.wasm pkg&#x2F;tinysearch_bg_opt.wasm After tweaking with the parameters of the cuckoo filters a bit and removing stop words from the articles, I arrived at 121kB (51kB gzipped) — not bad considering the average image size on the web is around 900kB. On top of that, the search functionality only gets loaded when a user clicks into the search field. Update Recently I moved the project from cuckoofilters to XOR filters. I used the awesome xorf project, which comes with built-in serde serialization. which allowed me to remove a lot of custom code. With that, I could reduce the payload size by another 20-25% percent. I’m down to 99kB (49kB gzipped) on my blog now. 🎉 The new version is released on crates.io already, if you want to give it a try. Frontend- and Glue Code wasm-pack will auto-generate the JavaScript code to talk to Wasm. For the search UI, I customized a few JavaScript and CSS bits from w3schools. It even has keyboard support! Now when a user enters a search query, we go through the cuckoo filter of each article and try to match the words. The results are scored by the number of hits. Thanks to my dear colleague Jorge Luis Betancourt for adding that part. Video of the search functionality (Fun fact: this animation is about the same size as the uncompressed Wasm search itself.) Caveats Only whole words are matched. I would love to add prefix-search, but the binary became too big when I tried. Usage The standalone binary to create the Wasm file is called tinysearch. It expects a single path to a JSON file as an input: tinysearch path&#x2F;to&#x2F;corpus.json This corpus.json contains the text you would like to index. The format is pretty straightforward: [ &amp;quot;title&amp;quot;: &amp;quot;Article 1&amp;quot;, &amp;quot;url&amp;quot;: &amp;quot;https:&#x2F;&#x2F;example.com&#x2F;article1&amp;quot;, &amp;quot;body&amp;quot;: &amp;quot;This is the body of article 1.&amp;quot; , &amp;quot;title&amp;quot;: &amp;quot;Article 2&amp;quot;, &amp;quot;url&amp;quot;: &amp;quot;https:&#x2F;&#x2F;example.com&#x2F;article2&amp;quot;, &amp;quot;body&amp;quot;: &amp;quot;This is the body of article 2.&amp;quot; ] You can generate this JSON file with any static site generator. Here’s my version for Zola: % set section = get_section(path=&amp;quot;_index.md&amp;quot;) % [ %- for post in section.pages -% % if not post.draft % &amp;quot;title&amp;quot;: post.title | striptags | json_encode | safe , &amp;quot;url&amp;quot;: post.permalink | json_encode | safe , &amp;quot;body&amp;quot;: post.content | striptags | json_encode | safe % if not loop.last % , % endif % % endif % %- endfor -% ] I’m pretty sure that the Jekyll version looks quite similar. Here’s a starting point. If you get something working for your static site generator, please let me know. Observations This is still the wild west: unstable features, nightly Rust, documentation gets outdated almost every day. Bring your thinking cap! Creating a product out of a good idea is a lot of work. One has to pay attention to many factors: ease-of-use, generality, maintainability, documentation, and so on. Rust is very good at removing dead code, so you usually don’t pay for what you don’t use. I would still advise you to be very conservative about the dependencies you add to a Wasm binary because it’s tempting to add features that you don’t need and which will add to the binary size. For example, I used StructOpt during testing, and I had a main() function that was parsing these command-line arguments. This was not necessary for Wasm, so I removed it later. I understand that not everyone wants to write Rust code. It’s complicated to get started with, but the cool thing is that you can use almost any other language, too. For example, you can write Go code and transpile to Wasm, or maybe you prefer PHP or Haskell. There is support for many languages already. A lot of people dismiss WebAssembly as a toy technology. They couldn’t be further from the truth. In my opinion, WebAssembly will revolutionize the way we build products for the web and beyond. What was very hard just two years ago is now easy: shipping code in any language to every browser. I’m super excited about its future. If you’re looking for a standalone, self-hosted search index for your company website, check out sonic. Also check out stork as an alternative. Try it! The code for tinysearch is on Github. Please be aware of these limitations: Only searches for entire words. There are no search suggestions. The reason is that prefix search blows up binary size like Mentos and Diet Coke. Since we bundle all search indices for all articles into one static binary, I only recommend to use it for low- to medium-sized websites. Expect around 4kB (non-compressed) per article. The compile times are abysmal at the moment (around 1.5 minutes after a fresh install on my machine), mainly because we’re compiling the Rust crate from scratch every time we rebuild the index. Update: This is mostly fixed thanks to the awesome work of CephalonRho in PR #13. Thanks again! The final Wasm code is laser-fast because we save the roundtrips to a search-server. The instant feedback loop feels more like filtering a list than searching through posts. It can even work fully offline, which might be nice if you like to bundle it with an app. " + }, { "title": "Maybe You Don't Need Kubernetes", "url": "https://endler.dev/2019/maybe-you-dont-need-kubernetes/", - "body": "\n \n \n A woman riding a scooter\n \n Illustration created by freepik, Nomad logo by HashiCorp.\n \n\nKubernetes is the 800-pound gorilla of container orchestration.\nIt powers some of the biggest deployments worldwide, but it comes\nwith a price tag.\nEspecially for smaller teams, it can be time-consuming to maintain and has a\nsteep learning curve. For what our team of four wanted to achieve at trivago, it\nadded too much overhead. So we looked into alternatives — and fell in love with\nNomad.\nThe wishlist\nOur team runs a number of typical services for monitoring and performance\nanalysis: API endpoints for metrics written in Go, Prometheus exporters, log\nparsers like Logstash or Gollum, and databases like InfluxDB or Elasticsearch.\nEach of these services run in their own container. We needed a simple system to\nkeep those jobs running.\nWe started with a list of requirements for container orchestration:\n\nRun a fleet of services across many machines.\nProvide an overview of running services.\nAllow for communication between services.\nRestart them automatically when they die.\nBe manageable by a small team.\n\nOn top of that, the following things were nice to have but not strictly\nrequired:\n\nTag machines by their capabilities (e.g., label machines with fast disks for\nI/O heavy services.)\nBe able to run these services independently of any orchestrator (e.g. in\ndevelopment).\nHave a common place to share configurations and secrets.\nProvide an endpoint for metrics and logging.\n\nWhy Kubernetes was not a good fit for us\nWhen creating a prototype with Kubernetes, we noticed that we started adding\never-more complex layers of logic to operate our services. Logic on which we\nimplicitly relied on.\nAs an example, Kubernetes allows embedding service configurations using\nConfigMaps. Especially when merging multiple config files or\nadding more services to a pod, this can get quite confusing quickly.\nKubernetes - or helm, for that matter - allows injecting external configs\ndynamically to ensure separation of concerns. But this can\nlead to tight, implicit coupling between your project and Kubernetes.\nHelm and ConfigMaps are optional features so you don’t have to use them. You\nmight as well just copy the config into the Docker image. However, it’s tempting\nto go down that path and build unnecessary abstractions that can later bite you.\nOn top of that, the Kubernetes ecosystem is still rapidly evolving. It takes a\nfair amount of time and energy to stay up-to-date with the best practices and\nlatest tooling. Kubectl, minikube, kubeadm, helm, tiller, kops, oc - the list\ngoes on and on. Not all tools are necessary to get started with Kubernetes, but\nit’s hard to know which ones are, so you have to be at least aware of them.\nBecause of that, the learning curve is quite steep.\nWhen to use Kubernetes\nAt trivago specifically, many teams use Kubernetes and are quite happy with it.\nThese instances are managed by Google or Amazon however, which have the capacity to do so.\nKubernetes comes with amazing\nfeatures,\nthat make container orchestration at scale more manageable:\n\nFine-grained rights management\nCustom controllers allow getting logic into the cluster. These are just\nprograms that talk to the Kubernetes API.\nAutoscaling! Kubernetes can scale your services up and down on demand. It\nuses service metrics to do this without manual intervention.\n\nThe question is if you really need all those features. You can't rely on these\nabstractions to just work; you'll have to learn what's going on under the\nhood.\nEspecially in our team, which runs most services on-premise (because of its\nclose connection to trivago's core infrastructure), we didn't want to afford\nrunning our own Kubernetes cluster. We wanted to ship services instead.\n\n\n \n\n\nBatteries not included\nNomad is the 20% of service orchestration that gets you 80% of the way. All it\ndoes is manage deployments. It takes care of your rollouts and restarts your\ncontainers in case of errors, and that's about it.\nThe entire point of Nomad is that it does less: it doesn’t include\nfine-grained rights management or advanced network policies, and that’s by\ndesign. Those components are provided as enterprise services, by a third-party,\nor not at all.\nI think Nomad hit a sweet-spot between ease of use and expressiveness. It's good\nfor small, mostly independent services. If you need more control, you'll have to\nbuild it yourself or use a different approach. Nomad is just an orchestrator.\nThe best part about Nomad is that it's easy to replace. There is little to no\nvendor lock-in because the functionality it provides can easily be integrated\ninto any other system that manages services. It just runs as a plain old single\nbinary on every machine in your cluster; that's it!\nThe Nomad ecosystem of loosely coupled components\nThe real power of Nomad lies within its ecosystem. It integrates very well with\nother - completely optional - products like Consul (a key-value store) or\nVault (for secrets handling). Inside your Nomad file, you can have sections\nfor fetching data from those services:\n\ntemplate {\n data = <<EOH\nLOG_LEVEL="{{key "service/geo-api/log-verbosity"}}"\nAPI_KEY="{{with secret "secret/geo-api-key"}}{{.Data.value}}{{end}}"\nEOH\n\n destination = "secrets/file.env"\n env = true\n}\n\nThis will read the service/geo-api/log-verbosity key from Consul and expose it\nas a LOG_LEVEL environment variable inside your job. It's also exposing\nsecret/geo-api-key from Vault as API_KEY. Simple, but powerful!\nBecause it's so simple, Nomad can also be easily extended with other services\nthrough its API. For example, jobs can be tagged for service discovery. At\ntrivago, we tag all services, which expose metrics, with trv-metrics. This\nway, Prometheus finds the services via Consul and periodically scrapes the\n/metrics endpoint for new data. The same can be done for logs by integrating\nLoki for example.\nThere are many other examples for extensibility:\n\nTrigger a Jenkins job using a webhook and Consul watches to redeploy your\nNomad job on service config changes.\nUse Ceph to add a distributed file system to Nomad.\nUse fabio for load balancing.\n\nAll of this allowed us to grow our infrastructure organically without too much\nup-front commitment.\nFair warning\nNo system is perfect. I advise you not to use any fancy new features in\nproduction right now. There are bugs and missing features of course - but\nthat's also the case for\nKubernetes.\nCompared to Kubernetes, there is far less momentum behind Nomad. Kubernetes has\nseen around 75.000 commits and 2000 contributors so far, while Nomad sports about\n14.000 commits and 300 contributors. It will be hard for Nomad to keep up with\nthe velocity of Kubernetes, but maybe it doesn’t have to! The scope is much more\nnarrow and the smaller community could also mean that it'll be easier to get your \npull request accepted, in comparison to Kubernetes.\nSummary\nThe takeaway is: don't use Kubernetes just because everybody else does.\nCarefully evaluate your requirements and check which tool fits the bill.\nIf you're planning to deploy a fleet of homogenous services on large-scale\ninfrastructure, Kubernetes might be the way to go. Just be aware of the\nadditional complexity and operational costs. Some of these costs can be\navoided by using a managed Kubernetes environment like Google Kubernetes\nEngine or Amazon EKS.\nIf you're just looking for a reliable orchestrator that is easy to maintain and\nextendable, why not give Nomad a try? You might be surprised by how far it'll get you.\nIf Kubernetes were a car, Nomad would be a scooter. Sometimes you prefer one and\nsometimes the other. Both have their right to exist.\nCredits\nThanks to my awesome colleagues Esteban Barrios, Jorge-Luis Betancourt, Simon Brüggen, Arne Claus, Inga Feick, Wolfgang Gassler, Barnabas Kutassy, Perry Manuk, Patrick Pokatilo, and Jakub Sacha for reviewing drafts of this article.\n" + "body": " A woman riding a scooter Source: Illustration created by freepik, Nomad logo by HashiCorp. Kubernetes is the 800-pound gorilla of container orchestration. It powers some of the biggest deployments worldwide, but it comes with a price tag. Especially for smaller teams, it can be time-consuming to maintain and has a steep learning curve. For what our team of four wanted to achieve at trivago, it added too much overhead. So we looked into alternatives — and fell in love with Nomad. The Wishlist Our team runs a number of typical services for monitoring and performance analysis: API endpoints for metrics written in Go, Prometheus exporters, log parsers like Logstash or Gollum, and databases like InfluxDB or Elasticsearch. Each of these services run in their own container. We needed a simple system to keep those jobs running. We started with a list of requirements for container orchestration: Run a fleet of services across many machines. Provide an overview of running services. Allow for communication between services. Restart them automatically when they die. Be manageable by a small team. On top of that, the following things were nice to have but not strictly required: Tag machines by their capabilities (e.g., label machines with fast disks for I&#x2F;O heavy services.) Be able to run these services independently of any orchestrator (e.g. in development). Have a common place to share configurations and secrets. Provide an endpoint for metrics and logging. Why Kubernetes Was Not A Good Fit For Us When creating a prototype with Kubernetes, we noticed that we started adding ever-more complex layers of logic to operate our services. Logic on which we implicitly relied on. As an example, Kubernetes allows embedding service configurations using ConfigMaps. Especially when merging multiple config files or adding more services to a pod, this can get quite confusing quickly. Kubernetes - or helm, for that matter - allows injecting external configs dynamically to ensure separation of concerns. But this can lead to tight, implicit coupling between your project and Kubernetes. Helm and ConfigMaps are optional features so you don’t have to use them. You might as well just copy the config into the Docker image. However, it’s tempting to go down that path and build unnecessary abstractions that can later bite you. On top of that, the Kubernetes ecosystem is still rapidly evolving. It takes a fair amount of time and energy to stay up-to-date with the best practices and latest tooling. Kubectl, minikube, kubeadm, helm, tiller, kops, oc - the list goes on and on. Not all tools are necessary to get started with Kubernetes, but it’s hard to know which ones are, so you have to be at least aware of them. Because of that, the learning curve is quite steep. When To Use Kubernetes At trivago specifically, many teams use Kubernetes and are quite happy with it. These instances are managed by Google or Amazon however, which have the capacity to do so. Kubernetes comes with amazing features, that make container orchestration at scale more manageable: Fine-grained rights management Custom controllers allow getting logic into the cluster. These are just programs that talk to the Kubernetes API. Autoscaling! Kubernetes can scale your services up and down on demand. It uses service metrics to do this without manual intervention. The question is if you really need all those features. You can’t rely on these abstractions to just work; you’ll have to learn what’s going on under the hood. Especially in our team, which runs most services on-premise (because of its close connection to trivago’s core infrastructure), we didn’t want to afford running our own Kubernetes cluster; we wanted to ship services instead. Batteries Not Included Nomad is the 20% of service orchestration that gets you 80% of the way. All it does is manage deployments. It takes care of your rollouts and restarts your containers in case of errors, and that’s about it. The entire point of Nomad is that it does less: it doesn’t include fine-grained rights management or advanced network policies, and that’s by design. Those components are provided as enterprise services, by a third-party — or not at all. I think Nomad hit a sweet-spot between ease of use and expressiveness. It’s good for small, mostly independent services. If you need more control, you’ll have to build it yourself or use a different approach. Nomad is just an orchestrator. The best part about Nomad is that it’s easy to replace. There is little to no vendor lock-in because the functionality it provides can easily be integrated into any other system that manages services. It just runs as a plain old single binary on every machine in your cluster; that’s it! The Nomad Ecosystem Of Loosely Coupled Components The real power of Nomad lies within its ecosystem. It integrates very well with other - completely optional - products like Consul (a key-value store) or Vault (for secrets handling). Inside your Nomad file, you can have sections for fetching data from those services: template data = &amp;lt;&amp;lt;EOH LOG_LEVEL=&amp;quot; key &amp;quot;service&#x2F;geo-api&#x2F;log-verbosity&amp;quot; &amp;quot; API_KEY=&amp;quot; with secret &amp;quot;secret&#x2F;geo-api-key&amp;quot; .Data.value end &amp;quot; EOH destination = &amp;quot;secrets&#x2F;file.env&amp;quot; env = true This will read the service&#x2F;geo-api&#x2F;log-verbosity key from Consul and expose it as a LOG_LEVEL environment variable inside your job. It’s also exposing secret&#x2F;geo-api-key from Vault as API_KEY. Simple, but powerful! Because it’s so simple, Nomad can also be easily extended with other services through its API. For example, jobs can be tagged for service discovery. At trivago, we tag all services, which expose metrics, with trv-metrics. This way, Prometheus finds the services via Consul and periodically scrapes the &#x2F;metrics endpoint for new data. The same can be done for logs by integrating Loki for example. There are many other examples for extensibility: Trigger a Jenkins job using a webhook and Consul watches to redeploy your Nomad job on service config changes. Use Ceph to add a distributed file system to Nomad. Use fabio for load balancing. All of this allowed us to grow our infrastructure organically without too much up-front commitment. Fair Warning No system is perfect. I advise you not to use any fancy new features in production right now. There are bugs and missing features of course - but that’s also the case for Kubernetes. Compared to Kubernetes, there is far less momentum behind Nomad. Kubernetes has seen around 75.000 commits and 2000 contributors so far, while Nomad sports about 14.000 commits and 300 contributors. It will be hard for Nomad to keep up with the velocity of Kubernetes, but maybe it doesn’t have to! The scope is much more narrow and the smaller community could also mean that it’ll be easier to get your pull request accepted, in comparison to Kubernetes. Summary The takeaway is: don’t use Kubernetes just because everyone else does. Carefully evaluate your requirements and check which tool fits the bill. If you’re planning to deploy a fleet of homogenous services on large-scale infrastructure, Kubernetes might be the way to go. Just be aware of the additional complexity and operational costs. Some of these costs can be avoided by using a managed Kubernetes environment like Google Kubernetes Engine or Amazon EKS. If you’re just looking for a reliable orchestrator that is easy to maintain and extendable, why not give Nomad a try? You might be surprised by how far it’ll get you. If Kubernetes were a car, Nomad would be a scooter. Sometimes you prefer one and sometimes the other. Both have their right to exist. " }, { "title": "What Is Rust Doing Behind the Curtains?", "url": "https://endler.dev/2018/cargo-inspect/", - "body": "Rust allows for a lot of syntactic sugar, that makes it a pleasure to write. It is sometimes hard, however, to look behind the curtain and see what the compiler is really doing with our code.\n\nAt Rust Belt Rust 2018, I saw a talk by Tshepang Lekhonkhobe titled Syntax conveniences afforded by the compiler (Recording here).\nTo quote the abstract:\n\nThe Rust compiler provides a number of conveniences that make life easier for its users. It is good to know what these are, to avoid being mystified by what's going on under the hood... the less magical thinking we have of the world, the better.\n\nHe goes on to give a few examples of these conveniences:\n\nlifetime elisions\ntype inference\nsyntactic sugar\nimplicit dereferencing\ntype coercions\nhidden code (e.g. the prelude)\n\nIt was very educational and fun to see him compare code with and without these conveniences during the talk.\nComing home, I wanted to learn more. I wondered if there was a tool, which revealed what Rust was doing behind the curtains.\nOver on Reddit, I found a discussion about compiler flags to produce desugared output.\n(Note that I'm using rustup here to trigger the nightly compiler with the +nightly flag.)\n\nrustc +nightly -Zunpretty=hir example.rs\n\nHIR stands for high-level intermediate representation. This is basically an abstract syntax tree (AST) more suited for use by the compiler. It replaces syntactic sugar with basic building blocks that are easier to handle by the following compile steps.\nTo find out more, read this detailed write-up by Nico Matsakis.\n\nAnyway, the output looked surprisingly readable (see below). With some syntax highlighting and formatting, this could be quite a handy tool.\nI tried to use rustfmt on it, and it worked unreasonably well.\nMotivated by this quick win, I wrapped it up in a cargo subcommand and called it cargo-inspect.\nLet's try cargo-inspect on some real code!\nExample - Desugaring a range expression\nThe following examples can also be found in the project's examples folder.\nInput:\n\nfor n in 1..3 {\n println!("{}", n);\n}\n\nOutput of cargo-inspect:\n\nThat's the neatly formatted terminal output. It sports line numbers and colors, thanks to prettyprint, which is a library on top of bat. Maybe you can't read that, so here's the gist of it:\n\nmatch ::std::iter::IntoIterator::into_iter(\n ::std::ops::Range { start: 1, end: 3 })\n mut iter => loop {\n // ...\n },\n};\n\nWe can see that 1..3 gets converted into std::ops::Range { start: 1, end: 3 }.\nTo the compiler backend, these are absolutely the same. So this holds:\n\nassert_eq!((1..3), std::ops::Range { start: 1, end: 3 });\nExample - File handling\nInput:\n\nuse std::fs::File;\nuse std::io::Error;\n\nfn main() -> Result<(), Error> {\n let file = File::open("file.txt")?;\n Ok(())\n}\n\nOutput:\n\n#[prelude_import]\nuse std::prelude::v1::*;\n#[macro_use]\nextern crate std;\nuse std::fs::File;\nuse std::io::Error;\n\nfn main() -> Result<(), Error> {\n let file = match ::std::ops::Try::into_result(\n <File>::open("file.txt")) {\n ::std::result::Result::Err(err) =>\n #[allow(unreachable_code)]\n {\n #[allow(unreachable_code)]\n return ::std::ops::Try::from_error(\n ::std::convert::From::from(err))\n }\n ::std::result::Result::Ok(val) =>\n #[allow(unreachable_code)]\n {\n #[allow(unreachable_code)]\n val\n }\n };\n Ok(())\n}\n\nWe can see that the carrier operator ? gets desugared into a match on the Result of File::open. In case of an error, We apply std::convert::From::from to convert between error types. Otherwise, we simply return the Ok value.\nFuture work\nI'm not planning to rewrite the compiler here. rustc is doing a far greater job than I could. All this functionality already existed before; I'm merely trying to make the compiler more approachable for learners like me.\nRight now, the tool is quite fragile. It throws ugly error messages when things go wrong.\nIt mostly shines, when you run it on small, isolated example snippets. \nGet involved!\nOver on Github, I opened up a few issues for others to get involved.\nNamely, I wish there were options to:\n\n✅ Make it work with cargo projects.\n✅ Show the original code above the desugared code.\nShow only part of the full output\n...and much more.\n\nAlso, if you find a particularly exciting code example, don't be shy to contribute it to the examples folder.\n" + "body": "Rust allows for a lot of syntactic sugar, that makes it a pleasure to write. It is sometimes hard, however, to look behind the curtain and see what the compiler is really doing with our code. At Rust Belt Rust 2018, I saw a talk by Tshepang Lekhonkhobe titled Syntax conveniences afforded by the compiler (Recording here). To quote the abstract: The Rust compiler provides a number of conveniences that make life easier for its users. It is good to know what these are, to avoid being mystified by what’s going on under the hood… the less magical thinking we have of the world, the better. He goes on to give a few examples of these conveniences: lifetime elisions type inference syntactic sugar implicit dereferencing type coercions hidden code (e.g. the prelude) It was very educational and fun to see him compare code with and without these conveniences during the talk. Coming home, I wanted to learn more. I wondered if there was a tool, which revealed what Rust was doing behind the curtains. Over on Reddit, I found a discussion about compiler flags to produce desugared output. (Note that I’m using rustup here to trigger the nightly compiler with the +nightly flag.) rustc +nightly -Zunpretty=hir example.rs HIR stands for high-level intermediate representation. This is basically an abstract syntax tree (AST) more suited for use by the compiler. It replaces syntactic sugar with basic building blocks that are easier to handle by the following compile steps. To find out more, read this detailed write-up by Nico Matsakis. Anyway, the output looked surprisingly readable (see below). With some syntax highlighting and formatting, this could be quite a handy tool. I tried to use rustfmt on it, and it worked unreasonably well. Motivated by this quick win, I wrapped it up in a cargo subcommand and called it cargo-inspect. Let’s try cargo-inspect on some real code! Example - Desugaring a range expression The following examples can also be found in the project’s examples folder. Input: for n in 1..3 println!(&amp;quot; &amp;quot;, n); Output of cargo-inspect: That’s the neatly formatted terminal output. It sports line numbers and colors, thanks to prettyprint, which is a library on top of bat. Maybe you can’t read that, so here’s the gist of it: match ::std::iter::IntoIterator::into_iter( ::std::ops::Range start: 1, end: 3 ) mut iter =&amp;gt; loop &#x2F;&#x2F; ... , ; We can see that 1..3 gets converted into std::ops::Range start: 1, end: 3 . To the compiler backend, these are absolutely the same. So this holds: assert_eq!((1..3), std::ops::Range start: 1, end: 3 ); Example - File handling Input: use std::fs::File; use std::io::Error; fn main() -&amp;gt; Result&amp;lt;(), Error&amp;gt; let file = File::open(&amp;quot;file.txt&amp;quot;)?; Ok(()) Output: #[prelude_import] use std::prelude::v1::*; #[macro_use] extern crate std; use std::fs::File; use std::io::Error; fn main() -&amp;gt; Result&amp;lt;(), Error&amp;gt; let file = match ::std::ops::Try::into_result( &amp;lt;File&amp;gt;::open(&amp;quot;file.txt&amp;quot;)) ::std::result::Result::Err(err) =&amp;gt; #[allow(unreachable_code)] #[allow(unreachable_code)] return ::std::ops::Try::from_error( ::std::convert::From::from(err)) ::std::result::Result::Ok(val) =&amp;gt; #[allow(unreachable_code)] #[allow(unreachable_code)] val ; Ok(()) We can see that the carrier operator ? gets desugared into a match on the Result of File::open. In case of an error, We apply std::convert::From::from to convert between error types. Otherwise, we simply return the Ok value. Talk Over at FOSDEM in Belgium, I was able to speak about the project in detail. Here is the recording: document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); Future work I’m not planning to rewrite the compiler here. rustc is doing a far greater job than I could. All this functionality already existed before; I’m merely trying to make the compiler more approachable for learners like me. Right now, the tool is quite fragile. It throws ugly error messages when things go wrong. It mostly shines, when you run it on small, isolated example snippets. Get involved! Over on Github, I opened up a few issues for others to get involved. Namely, I wish there were options to: Make it work with cargo projects. Show the original code above the desugared code. Show only part of the full output …and much more. Also, if you find a particularly exciting code example, don’t be shy to contribute it to the examples folder. " }, { "title": "The Unreasonable Effectiveness of Excel Macros", "url": "https://endler.dev/2018/excel/", - "body": "I never was a big fan of internships, partially because all the exciting\ncompanies were far away from my little village in Bavaria and partially because\nI was too shy to apply.\nOnly once I applied for an internship in Ireland as part of a school program.\nOur teacher assigned the jobs and so my friend got one at Apple and I ended up\nat a medium-sized IT distributor — let's call them PcGo.\n\nJudging by the website, the company looked quite impressive, but in reality, it\nwas just a secluded, grey warehouse in the rainy industrial area of Cork. Upon\narrival, I was introduced to my colleague Evgeny, who was the main (and only)\nemployee responsible for assembling desktop computers. From what I can tell, he\nran the shop. He just spoke broken English, so he handed me an electric\nscrewdriver and a box of screws, and I got to work. Together we assembled a lot\nof computers in my first week, and we had a lot of fun. One day he drove me home\nfrom work because I missed my bus. It was a rainy day and while he was driving\nthrough the narrow streets of Cork we talked and laughed, but all of a sudden I\nheard a loud bang. I looked through the rear mirror only to find that there was\nno rear mirror anymore. Turns out he bumped into another car, and the thing went\noff. Evgeny didn't mind. In a thick Eastern-European accent he remarked "Lost\nthree mirrors before already," and kept driving.\nIn my second week, I had a visit from my boss. Apparently, I was done with the\nworkload that they planned for my three-week internship. I was used to\nassembling and installing computers, which explains why.\nTo keep me busy, they put together another task. On an old Windows 98 computer\nin the back, he pointed the browser to silverpages.ie, searched for "computer"\nand after a while we looked at an endless list of addresses of Irish companies\nhaving "something to do with computers." Each entry consisted of the expected\nfields: the company name, the address, the phone number, the website (if any)\nand a list of keywords.\nMy boss said that they needed an overview of all competing vendors. He carefully\nselected a field from an entry, copied it and pasted it into an Excel sheet. He\ndid the same for the remaining fields. "That's it!", he said with a fake smile.\nWe both knew that this would mean two boring weeks for me.\nThey wanted to keep me busy by letting me manually scrape the entirety of a web database.\nI could have taken that as an insult, but instead, I looked at it as a\nchallenge.\nI noticed that the page number on silverpages.ie could be controlled by a GET\nparameter.\n"Can I write a program that does the scraping?" My boss was noticeably puzzled.\n"Uhm... you can do whatever you want, but you're not allowed to install any\nadditional software!". With that, he was off.\nJudging from the installed programs, I wasn't left with many choices: Excel or\nMinesweeper. I knew that Excel's Visual Basic macros were quite powerful, but I\nwasn't sure if I could scrape a full website with it.\nAfter a while, I detected a feature to download a website into an Excel sheet\n(what a glorious functionality). This worked perfectly, so all I had to do was\nrecord a macro to create a temporary sheet for each page, copy all important\nfields into a "master slide" and then get rid of the temporary sheet. I recorded\nthe macro and looked at the code. The rest of the day was spent figuring out how\nto modify the URL in a loop and cleaning up the macro. I pressed the "run macro"\nbutton and then I sat there waiting. The computer was running at full speed. My\nbiggest fear was that the program would crash or that the computer would run out\nof memory. I refrained from playing minesweeper on it, so I mostly played pool\nor chatted with Evgeny.\nWhen I came to the office the next morning, my program was done. To my surprise, it scraped the entirety\nof SilverPages, and there were many thousands of entries in the list. I sent the\ndocument to my boss via E-Mail and then got back to playing minesweeper.\nAn hour later, three guys with suits were standing behind me. I had to show them\nthe list again. They couldn't believe I did that on my own, so I showed them the\ntool to scrape the data. For them, I had some sort of superpower.\nThey left without giving me another task; I was free to do whatever I wanted for\nthe remaining two weeks. I went on to write an inventory tool for them, which\nthey could also manage via Excel. It was just a glorious Excel form for a\nspreadsheet that they maintained manually. I spent two weeks of my summer\nvacation to finish that tool because they said they would pay me for that, which, of course, they didn't :).\nLessons learned\n\nNever underestimate the power of Excel macros.\nIf you have a boring task at hand, make it a challenge for yourself by adding\na handicap.\n\n" + "body": "I never was a big fan of internships, partially because all the exciting companies were far away from my little village in Bavaria and partially because I was too shy to apply. Only once I applied for an internship in Ireland as part of a school program. Our teacher assigned the jobs and so my friend got one at Apple and I ended up at a medium-sized IT distributor — let’s call them PcGo. Judging by the website, the company looked quite impressive, but in reality, it was just a secluded, grey warehouse in the rainy industrial area of Cork. Upon arrival, I was introduced to my colleague Evgeny, who was the main (and only) employee responsible for assembling desktop computers. From what I can tell, he ran the shop. He just spoke broken English, so he handed me an electric screwdriver and a box of screws, and I got to work. Together we assembled a lot of computers in my first week, and we had a lot of fun. One day he drove me home from work because I missed my bus. It was a rainy day and while he was driving through the narrow streets of Cork we talked and laughed, but all of a sudden I heard a loud bang. I looked through the rear mirror only to find that there was no rear mirror anymore. Turns out he bumped into another car, and the thing went off. Evgeny didn’t mind. In a thick Eastern-European accent he remarked “Lost three mirrors before already,” and kept driving. In my second week, I had a visit from my boss. Apparently, I was done with the workload that they planned for my three-week internship. I was used to assembling and installing computers, which explains why. To keep me busy, they put together another task. On an old Windows 98 computer in the back, he pointed the browser to silverpages.ie, searched for “computer” and after a while we looked at an endless list of addresses of Irish companies having “something to do with computers.” Each entry consisted of the expected fields: the company name, the address, the phone number, the website (if any) and a list of keywords. My boss said that they needed an overview of all competing vendors. He carefully selected a field from an entry, copied it and pasted it into an Excel sheet. He did the same for the remaining fields. “That’s it!”, he said with a fake smile. We both knew that this would mean two boring weeks for me. They wanted to keep me busy by letting me manually scrape the entirety of a web database. I could have taken that as an insult, but instead, I looked at it as a challenge. I noticed that the page number on silverpages.ie could be controlled by a GET parameter. “Can I write a program that does the scraping?” My boss was noticeably puzzled. “Uhm… you can do whatever you want, but you’re not allowed to install any additional software!”. With that, he was off. Judging from the installed programs, I wasn’t left with many choices: Excel or Minesweeper. I knew that Excel’s Visual Basic macros were quite powerful, but I wasn’t sure if I could scrape a full website with it. After a while, I detected a feature to download a website into an Excel sheet (what a glorious functionality). This worked perfectly, so all I had to do was record a macro to create a temporary sheet for each page, copy all important fields into a “master slide” and then get rid of the temporary sheet. I recorded the macro and looked at the code. The rest of the day was spent figuring out how to modify the URL in a loop and cleaning up the macro. I pressed the “run macro” button and then I sat there waiting. The computer was running at full speed. My biggest fear was that the program would crash or that the computer would run out of memory. I refrained from playing minesweeper on it, so I mostly played pool or chatted with Evgeny. When I came to the office the next morning, my program was done. To my surprise, it scraped the entirety of SilverPages, and there were many thousands of entries in the list. I sent the document to my boss via E-Mail and then got back to playing minesweeper. An hour later, three guys with suits were standing behind me. I had to show them the list again. They couldn’t believe I did that on my own, so I showed them the tool to scrape the data. For them, I had some sort of superpower. They left without giving me another task; I was free to do whatever I wanted for the remaining two weeks. I went on to write an inventory tool for them, which they could also manage via Excel. It was just a glorious Excel form for a spreadsheet that they maintained manually. I spent two weeks of my summer vacation to finish that tool because they said they would pay me for that, which, of course, they didn’t :). Lessons learned Never underestimate the power of Excel macros. If you have a boring task at hand, make it more challenging by adding constraints. " }, { "title": "Switching from a German to a US Keyboard Layout - Is It Worth It?", "url": "https://endler.dev/2018/keyboard/", - "body": "For the first three decades of my life, I've used a German keyboard layout.\nA few months ago, I switched to a US layout.\nThis post summarizes my thoughts around the topic.\nI was looking for a similar article before jumping the gun, but I couldn't find one — so I'll try to fill this gap.\nWhy switch?\nI was reasonably efficient when writing prose, but felt like \na lemur on a piano when programming: reaching the special keys ({, ;, or /)\nrequired lots of finger-stretching.\n\n \n \n German Keyboard Layout\n \n Image by Wikipedia\n \n\nHere's Wikipedia's polite\nexplanation why the\nGerman keyboard sucks for programming:\n\nLike many other non-American keyboards, German keyboards change the right Alt\nkey into an Alt Gr key to access a third level of key assignments. This is\nnecessary because the umlauts and some other special characters leave no room\nto have all the special symbols of ASCII, needed by programmers among others,\navailable on the first or second (shifted) levels without unduly increasing\nthe size of the keyboard.\n\nWhy switch now?\nAfter many years of using a rubber-dome Logitech Cordless Desktop\nWave, I\nhad to get a mechanical keyboard again. \nThose rubber domes just feel too mushy to me now. In addition to that, I enjoy the\nclicky sound of a mechanical keyboard and the noticeable tactile bump. (I'm using\nCherry MX Brown Keys with O-Ring dampeners to contain the anger of my coworkers.)\nMost mechanical keyboards come with an ANSI US layout only, so I figured, I'd\nfinally make the switch.\n\n \n \n Picture of my lovely keyboard\n \n\nHow long did it take to get accustomed to the new layout?\nWorking as a Software Engineer, my biggest fear was, that the switch would slow\ndown my daily work. This turned out not to be true. I was reasonably productive\nfrom day one and nobody even noticed any difference. (That's a good thing,\nright?)\nAt first, I didn't like the bar-shaped US-Return key. I preferred the European\nlayout with a vertical enter key. I was afraid that I would hit the key by\naccident. After a while, I find that the US return key to be even more convenient. \nI never hit it by accident.\nWithin two weeks, I was back to 100% typing speed.\nDid my programming speed improve noticeably?\nYup.\nEspecially when using special characters (/, ;, {, and so on) I'm much\nfaster now; partly because the key locations feel more intuitive, but mainly\nbecause my fingers stay at their dedicated positions now.\nSomehow the position of special characters feels right. I can now understand the\nreason why Vim is using / for search or why the pipe symbol is |: both are\neasy to reach! It all makes sense now!\n(For a fun time, try that on a German keyboard!)\nI now understand why Mircosoft chose \\ as a directory separator: it's easily\naccessible from a US keyboard. On the German layout, it's… just… awful\n(Alt Gr+ß on Windows, Shift + Option + 7 on Mac).\nThe opening curly brace on a German layout Mac is produced with Alt+8, which\nalways made me leave the home\nrow and break my typing\nflow. Now there are dedicated keys for parentheses. Such a relief!\nAm I slower when writing German texts now?\nIn the beginning, I was.\nSomehow my brain associated the German layout with German\ntexts. First, I used the macOS layout switcher.\nThis turned out to be cumbersome and take time.\nThen I found the "US with Umlauts via Option Key\nLayout". It works perfectly fine for\nme. It allows me to use a single Keyboard layout but insert German umlauts at will\n(e.g. ö is Option+o). There is probably a similar layout for other language combinations.\nIs switching between keyboards painful?\n\n \n \n US keyboard layout\n \n Wikipedia\n \n\nMy built-in MacBook Pro keyboard layout is still German. I was afraid, that switching between\nthe internal German and the external English keyboard would confuse me. This\nturned out not to be a problem. I rarely look at the print anyway.\nSummary\nIf you consider switching, just do it. I don't look back at all.\nThanks to Simon Brüggen for reviewing drafts of the article.\n" + "body": "For the first three decades of my life, I’ve exclusively used a German keyboard layout for programming. In 2018, I finally switched to a US layout. This post summarizes my thoughts around the topic. I was looking for a similar article before jumping the gun, but I couldn’t find one — so I wrote it. My current keyboard (as of April 2021), the low-profile, tenkeyless Keychron K1 is close to my favorite input device. Yes, I got the RGB version. — Amazon referral link. Why Switch To the US Layout? I was reasonably efficient when writing prose, but felt like a lemur on a piano when programming: lots of finger-stretching while trying to reach the special keys like , ;, or &#x2F;. German Keyboard Layout Source: Image by Wikipedia Here’s Wikipedia’s polite explanation why the German keyboard sucks for programming: Like many other non-American keyboards, German keyboards change the right Alt key into an Alt Gr key to access a third level of key assignments. This is necessary because the umlauts and some other special characters leave no room to have all the special symbols of ASCII, needed by programmers among others, available on the first or second (shifted) levels without unduly increasing the size of the keyboard. But Why Switch Now? After many years of using a rubber-dome Logitech Cordless Desktop Wave, I had to get a mechanical keyboard again. Those rubber domes just feel too mushy to me now. In addition to that, I enjoy the clicky sound of a mechanical keyboard and the noticeable tactile bump. (I’m using Cherry MX Brown Keys with O-Ring dampeners to contain the anger of my coworkers.) Most mechanical keyboards come with an ANSI US layout only, so I figured, I’d finally make the switch. My first mechanical keyboard — Durgod Taurus K320 (referral link). They also have a fancy white-pink ISO version now. How Long Did It Take To Get Accustomed To The New Layout? Working as a Software Engineer, my biggest fear was, that the switch would slow down my daily work. This turned out not to be true. I was reasonably productive from day one, and nobody even noticed any difference. (That’s a good thing, right?) At first, I didn’t like the bar-shaped US-Return key. I preferred the European layout with a vertical enter key. I was afraid that I would hit the key by accident. After a while, I find that the US return key to be even more convenient. I never hit it by accident, and it’s easy to reach with my pinky from the home position. Within two weeks, I was back to 100% typing speed. Did My Programming Speed Improve Noticeably? Yup. I’d say I can type programs about 30% faster. Especially when using special characters (&#x2F;, ;, , and so on) I’m much faster now; partly because the key locations feel more intuitive, but mainly because my fingers stay at their dedicated positions. Somehow the position of special characters feels just right. I can finally understand the reason why Vim is using &#x2F; for search or why the pipe symbol is |: both are easy to reach! It all makes sense! (For a fun time, try that on a German keyboard!) I understand why Mircosoft chose as a directory separator: it’s easily accessible from a US keyboard. On the German layout, it’s… just… awful (Alt Gr+ß on Windows, Shift + Option + 7 on Mac). The opening curly brace on a German layout Mac is produced with Alt+8, which always made me leave the home row and break my typing flow. Now there are dedicated keys for parentheses. Such a relief! Update: It also helps greatly when looking up hotkeys for IDEs, text editors, photo editors, etc. because some programs remap shortcuts for the German market, which means that all the English documentation is totally worthless. Thanks to the English layout, I can just use the shortcuts mentioned and move on with my life. Am I Slower When Writing German Texts Now? In the beginning, I was. Somehow my brain associated the German layout with German texts. First, I used the macOS layout switcher. This turned out to be cumbersome and take time. Then I found the “US with Umlauts via Option Key Layout”. It works perfectly fine for me. It allows me to use a single Keyboard layout but insert German umlauts at will (e.g. ö is Option+o). There is probably a similar layout for other language combinations. Stefan Imhoff notified me that there’s also a Karabiner rule which does the same. Might come in handy in case you already use this tool. Is Switching Between Keyboards Painful? US keyboard layout Source: Wikipedia My built-in MacBook Pro keyboard layout is still German. I was afraid, that switching between the internal German and the external English keyboard would confuse me. This turned out not to be a problem. I rarely look at the print anyway. (Update: can’t remember when I last looked at the print.) How Often Do You Switch Back To A German Layout Now? Never. My Girlfriend has a German keyboard and every time I have to use it, I switch to the US layout. It makes her very happy when I do this and forget to switch back to German when I’m done. Summary If you consider switching, just do it! I don’t look back at all and apart from the initial transition period, I still couldn’t find any downsides. Since posting this article, many of my friends made the switch as well and had similar experiences: Wolfgang Gassler wrote a post about his layout switch on Linux Discussion of the article on Twitter " }, { "title": "fastcat - A Faster `cat` Implementation Using Splice", "url": "https://endler.dev/2018/fastcat/", - "body": "\nLots of people asked me to write another piece about the internals of well-known\nUnix commands. Well, actually, nobody asked me, but it makes for a good\nintro. I'm sure you’ve read the previous parts about yes and\nls — they are epic.\nAnyway, today we talk about cat, which is used to concatenate files - or, more\ncommonly, abused to print a file's contents to the screen.\n\n# Concatenate files, the intended purpose\ncat input1.txt input2.txt input3.txt > output.txt\n\n# Print file to screen, the most common use case\ncat myfile\nImplementing cat\nHere's a naive cat in Ruby:\n\n#!/usr/bin/env ruby\n\ndef cat(args)\n args.each do |arg|\n IO.foreach(arg) do |line|\n puts line\n end\n end\nend\n\ncat(ARGV)\n\nThis program goes through each file and prints its contents line by line.\nEasy peasy! But wait, how fast is this tool?\nI quickly created a random 2 GB file for the benchmark.\nLet's compare the speed of our naive implementation with the system one\nusing the awesome pv (Pipe Viewer) tool.\nAll tests are averaged over five runs on a warm cache (file in memory).\n\n# Ruby 2.5.1\n> ./rubycat myfile | pv -r > /dev/null\n[196MiB/s]\n\nNot bad, I guess? How does it compare with my system's cat?\n\ncat myfile | pv -r > /dev/null\n[1.90GiB/s]\n\nUh oh, GNU cat is ten times faster than our little Ruby cat. 🐌\nMaking our Ruby cat a little faster\nOur naive Ruby code can be tweaked a bit.\nTurns out line buffering hurts performance in the end1:\n\n#!/usr/bin/env ruby\n\ndef cat(args)\n args.each do |arg|\n IO.copy_stream(arg, STDOUT)\n end\nend\n\ncat(ARGV)\n\nrubycat myfile | pv -r > /dev/null\n[1.81GiB/s]\n\nWow... we didn't really try hard, and we're already approaching the speed of a\ntool that gets optimized since\n1971. 🎉\nBut before we celebrate too much, let's see if we can go even faster.\nSplice\nWhat initially motivated me to write about cat was this comment by user\nwahern on\nHackerNews:\n\nI'm surprised that neither GNU yes nor GNU cat uses splice(2).\n\nCould this splice thing make printing files even faster? — I was intrigued.\nSplice was first introduced to the Linux Kernel in 2006, and there is a nice\nsummary from Linus Torvalds himself,\nbut I prefer the description from the manpage:\n\nsplice() moves data between two file descriptors without copying\nbetween kernel address space and user address space. It transfers up\nto len bytes of data from the file descriptor fd_in to the file\ndescriptor fd_out, where one of the file descriptors must refer to a\npipe.\n\nIf you really want to dig deeper, here's the corresponding source code from the\nLinux Kernel,\nbut we don't need to know all the nitty-gritty details for now.\nInstead, we can just inspect the header from the C implementation:\n\n#include <fcntl.h>\n\nssize_t splice (int fd_in, loff_t *off_in, int fd_out,\n loff_t *off_out, size_t len,\n unsigned int flags);\n\nTo break it down even more, here's how we would copy the entire src file to dst:\n\nconst ssize_t r = splice (src, NULL, dst, NULL, size, 0);\n\nThe cool thing about this is that all of it happens inside the Linux kernel, which means we won't copy a single byte to userspace (where our program runs).\nIdeally, splice works by remapping pages and does not actually copy\nany data, which may improve I/O performance\n(reference).\n\n \n \n \n File icon by Aleksandr Vector from the Noun Project. Terminal icon by useiconic.com from the Noun Project.\n \n\nUsing splice from Rust\nI have to say I'm not a C programmer and I prefer Rust because it offers a safer\ninterface. Here's the same thing in Rust:\n\n#[cfg(any(target_os = "linux", target_os = "android"))]\npub fn splice(\n fd_in: RawFd,\n off_in: Option<&mut libc::loff_t>,\n fd_out: RawFd,\n off_out: Option<&mut libc::loff_t>,\n len: usize,\n flags: SpliceFFlags,\n) -> Result<usize>\n\nNow I didn't implement the Linux bindings myself. Instead, I just used a library called\nnix, which provides Rust friendly bindings to *nix APIs.\nThere is one caveat, though:\nWe cannot really copy the file directly to standard out, because splice\nrequires one file descriptor to be a pipe.\nThe way around that is to create a pipe, which consists of a reader and a\nwriter (rd and wr).\nWe pipe the file into the writer, and then we read from the pipe and push the data to stdout.\nYou can see that I use a relatively big buffer of 16384 bytes (214) to improve performance.\n\nextern crate nix;\n\nuse std::env;\nuse std::fs::File;\nuse std::io;\nuse std::os::unix::io::AsRawFd;\n\nuse nix::fcntl::{splice, SpliceFFlags};\nuse nix::unistd::pipe;\n\nconst BUF_SIZE: usize = 16384;\n\nfn main() {\n for path in env::args().skip(1) {\n let input = File::open(&path).expect(&format!("fcat: {}: No such file or directory", path));\n let (rd, wr) = pipe().unwrap();\n let stdout = io::stdout();\n let _handle = stdout.lock();\n\n loop {\n let res = splice(\n input.as_raw_fd(),\n None,\n wr,\n None,\n BUF_SIZE,\n SpliceFFlags::empty(),\n ).unwrap();\n\n if res == 0 {\n // We read 0 bytes from the input,\n // which means we're done copying.\n break;\n }\n\n let _res = splice(\n rd,\n None,\n stdout.as_raw_fd(),\n None,\n BUF_SIZE,\n SpliceFFlags::empty(),\n ).unwrap();\n }\n }\n}\n\nSo, how fast is this?\n\nfcat myfile | pv -r > /dev/null\n[5.90GiB/s]\n\nHoly guacamole. That's over three times as fast as system cat.\nOperating System support\n\nLinux and Android are fully supported.\nOpenBSD\nalso has some sort of splice implementation called\nsosplice. I haven't tested that, though.\nOn macOS, the closest thing to splice is its bigger brother,\nsendfile, which can send a\nfile to a socket within the Kernel. Unfortunately, it does not support sending\nfrom file to file.2 There's also\ncopyfile,\nwhich has a similar interface, but unfortunately, it is not zero-copy. (I\nthought so in the beginning, but I was\nwrong.)\nWindows doesn't provide zero-copy file-to-file transfer\n(only file-to-socket transfer using the TransmitFile API).\n\nNevertheless, in a production-grade\nimplementation, the splice support could be activated on systems that support\nit, while using a generic implementation as a fallback.\nNice, but why on earth would I want that?\nI have no idea. Probably you don't, because your bottleneck is somewhere else.\nThat said, many people use cat for piping data into another process like\n\n# Count all lines in C files\ncat *.c | wc -l\n\nor\n\ncat kittens.txt | grep "dog"\n\nIn this case, if you notice that cat is the bottleneck try fcat (but first,\ntry to avoid cat altogether).\nWith some more work, fcat could also be used to directly route packets from one\nnetwork card to another, similar to netcat. \nLessons learned\n\nThe closer we get to bare metal, the more our hard-won abstractions fall\napart, and we are back to low-level systems programming.\nApart from a fast cat, there's also a use-case for a slow cat: old computers.\nFor that purpose, there's... well.. slowcat.\n\nThat said, I still have no idea why GNU cat does not use splice on Linux. 🤔\nThe source code for fcat is on Github.\nContributions welcome!\nThanks to Olaf Gladis for helping me run the benchmarks on his Linux machine and to Patrick Pokatilo and Simon Brüggen for reviewing drafts of the article.\nFootnotes\n1. Thanks to reader Freeky for making this code more idiomatic.↩\n2. Thanks to reader masklinn for the hint.↩ \n" + "body": " Lots of people asked me to write another piece about the internals of well-known Unix commands. Well, actually, nobody asked, but it makes for a good intro. I’m sure you’ve read the previous parts about yes and ls — they are epic. Anyway, today we talk about cat, which is used to concatenate files - or, more commonly, abused to print a file’s contents to the screen. # Concatenate files, the intended purpose cat input1.txt input2.txt input3.txt &amp;gt; output.txt # Print file to screen, the most common use-case cat myfile Implementing cat Here’s a naive cat in Ruby: #!&#x2F;usr&#x2F;bin&#x2F;env ruby def cat(args) args.each do |arg| IO.foreach(arg) do |line| puts line end end end cat(ARGV) This program goes through each file and prints its contents line by line. Easy peasy! But wait, how fast is this tool? I quickly created a random 2 GB file for the benchmark. Let’s compare the speed of our naive implementation with the system one using the awesome pv (Pipe Viewer) tool. All tests are averaged over five runs on a warm cache (file in memory). # Ruby 2.5.1 &amp;gt; .&#x2F;rubycat myfile | pv -r &amp;gt; &#x2F;dev&#x2F;null [196MiB&#x2F;s] Not bad, I guess? How does it compare with my system’s cat? cat myfile | pv -r &amp;gt; &#x2F;dev&#x2F;null [1.90GiB&#x2F;s] Uh oh, GNU cat is ten times faster than our little Ruby cat. 💎🐈🐌 Making our Ruby cat a little faster Our naive Ruby code can be tweaked a bit. Turns out line buffering hurts performance in the end1: #!&#x2F;usr&#x2F;bin&#x2F;env ruby def cat(args) args.each do |arg| IO.copy_stream(arg, STDOUT) end end cat(ARGV) rubycat myfile | pv -r &amp;gt; &#x2F;dev&#x2F;null [1.81GiB&#x2F;s] Wow… we didn’t really try hard, and we’re already approaching the speed of a tool that gets optimized since 1971. 🎉 But before we celebrate too much, let’s see if we can go even faster. Splice What initially motivated me to write about cat was this comment by user wahern on Hacker News: I’m surprised that neither GNU yes nor GNU cat uses splice(2). Could this splice thing make printing files even faster? — I was intrigued. Splice was first introduced to the Linux Kernel in 2006, and there is a nice summary from Linus Torvalds himself, but I prefer the description from the manpage: splice() moves data between two file descriptors without copying between kernel address space and user address space. It transfers up to len bytes of data from the file descriptor fd_in to the file descriptor fd_out, where one of the file descriptors must refer to a pipe. If you really want to dig deeper, here’s the corresponding source code from the Linux Kernel, but we don’t need to know all the nitty-gritty details for now. Instead, we can just inspect the header from the C implementation: #include &amp;lt;fcntl.h&amp;gt; ssize_t splice (int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags); To break it down even more, here’s how we would copy the entire src file to dst: const ssize_t r = splice (src, NULL, dst, NULL, size, 0); The cool thing about this is that all of it happens inside the Linux kernel, which means we won’t copy a single byte to userspace (where our program runs). Ideally, splice works by remapping pages and does not actually copy any data, which may improve I&#x2F;O performance (reference). Source: File icon by Aleksandr Vector from the Noun Project. Terminal icon by useiconic.com from the Noun Project. Using splice from Rust I have to say I’m not a C programmer and I prefer Rust because it offers a safer interface. Here’s the same thing in Rust: #[cfg(any(target_os = &amp;quot;linux&amp;quot;, target_os = &amp;quot;android&amp;quot;))] pub fn splice( fd_in: RawFd, off_in: Option&amp;lt;&amp;amp;mut libc::loff_t&amp;gt;, fd_out: RawFd, off_out: Option&amp;lt;&amp;amp;mut libc::loff_t&amp;gt;, len: usize, flags: SpliceFFlags, ) -&amp;gt; Result&amp;lt;usize&amp;gt; Now, I didn’t implement the Linux bindings myself. Instead, I just used a library called nix, which provides Rust friendly bindings to *nix APIs. There is one caveat, though: We cannot really copy the file directly to standard out, because splice requires one file descriptor to be a pipe. The way around that is to create a pipe, which consists of a reader and a writer (rd and wr). We pipe the file into the writer, and then we read from the pipe and push the data to stdout. You can see that I use a relatively big buffer of 16384 bytes (214) to improve performance. extern crate nix; use std::env; use std::fs::File; use std::io; use std::os::unix::io::AsRawFd; use nix::fcntl:: splice, SpliceFFlags ; use nix::unistd::pipe; const BUF_SIZE: usize = 16384; fn main() for path in env::args().skip(1) let input = File::open(&amp;amp;path).expect(&amp;amp;format!(&amp;quot;fcat: : No such file or directory&amp;quot;, path)); let (rd, wr) = pipe().unwrap(); let stdout = io::stdout(); let _handle = stdout.lock(); loop let res = splice( input.as_raw_fd(), None, wr, None, BUF_SIZE, SpliceFFlags::empty(), ).unwrap(); if res == 0 &#x2F;&#x2F; We read 0 bytes from the input, &#x2F;&#x2F; which means we&amp;#39;re done copying. break; let _res = splice( rd, None, stdout.as_raw_fd(), None, BUF_SIZE, SpliceFFlags::empty(), ).unwrap(); So, how fast is this? fcat myfile | pv -r &amp;gt; &#x2F;dev&#x2F;null [5.90GiB&#x2F;s] Holy guacamole. That’s over three times as fast as system cat. Operating System support Linux and Android are fully supported. OpenBSD also has some sort of splice implementation called sosplice. I didn’t test that, though. On macOS, the closest thing to splice is its bigger brother, sendfile, which can send a file to a socket within the Kernel. Unfortunately, it does not support sending from file to file.2 There’s also copyfile, which has a similar interface, but unfortunately, it is not zero-copy. (I thought so in the beginning, but I was wrong.) Windows doesn’t provide zero-copy file-to-file transfer (only file-to-socket transfer using the TransmitFile API). Nevertheless, in a production-grade implementation, the splice support could be activated on systems that support it, while using a generic implementation as a fallback. Nice, but why on earth would I want that? I have no idea. Probably you don’t, because your bottleneck is somewhere else. That said, many people use cat for piping data into another process like # Count all lines in C files cat *.c | wc -l or cat kittens.txt | grep &amp;quot;dog&amp;quot; In this case, if you notice that cat is the bottleneck try fcat (but first, try to avoid cat altogether). With some more work, fcat could also be used to directly route packets from one network card to another, similar to netcat. Lessons learned The closer we get to bare metal, the more our hard-won abstractions fall apart, and we are back to low-level systems programming. Apart from a fast cat, there’s also a use-case for a slow cat: old computers. For that purpose, there’s — you guessed it — slowcat. That said, I still have no idea why GNU cat does not use splice on Linux. 🤔 The source code for fcat is on Github. Contributions welcome! Footnotes 1. Thanks to reader Freeky for making this code more idiomatic.↩ 2. Thanks to reader masklinn for the hint.↩ " }, { "title": "That Octocat on the Wall", "url": "https://endler.dev/2018/github/", - "body": "\nSo I'm in a bit of a sentimental mood lately.\nGithub got acquired by Microsoft.\nWhile I think the acquisition was well-deserved, I still wish it didn't happen.\nLet me explain.\nMy early days\nI joined Github on 3rd of January 2010.\nSince I was a bit late to the game, my usual handle (mre) was already taken.\nSo I naively sent a mail to Github, asking if I could bag the name as it seemed to be abandoned.\nTo my surprise, I got an answer.\nThe response came from a guy named Chris Wanstrath.\nAll he wrote was "it's yours." \nThat was the moment I fell in love with Github.\nI felt encouraged to collaborate on projects, that everybody could contribute something valuable.\nOnly later I found out that Chris was one of the founders and the CEO of the company.\nLiving on Github\nBefore Github, there was SourceForge, and I only went there to download binaries.\nGithub showed me, that there was an entire community of like-minded people\nout there who think alike and love to work on code in their free-time.\nTo me, Github is much more than a git interface; it's a social network.\nWhile other people browse Facebook or Instagram, I browse Github.\nI can still vividly remember getting my first star and my first issue on one of my projects coming from a real (!) person other than myself.\nAfter so many years, a pull-request still feels like the most personal gift anyone could give to me.\nGithub - the culture\nAfter a while, I started to admire some Github employees deeply:\n\nZach Holman (who is about my age) is a great writer, speaker, and one of the most creative developers I can think of.\nScott Chacon, who taught me a lot about git and whose presentation tool, showoff, I've used at University.\nTom Preston-Werner, who I admire for refusing an offer from Microsoft to pursue his dream and build Github, for establishing a super-nerdy company culture, and for Jekyll.\n\nAll three developers have since left the company.\nI can't help but notice that Github has changed.\nThe harassment accusations and letting Zach Holman go are only part of the story.\nIt has become a company like any other, maintaining a mature product.\nIt doesn't excite me anymore.\nAn alternative reality\nThere's still a bitter taste in my mouth when I think that Github has fallen prey to one of the tech giants. I loved Github while it was a small, friendly community of passionate developers.\nCould this have been sustainable?\nMaybe through paid features for project maintainers.\nYou see, if you do Open Source every day, it can be a lot of work.\nPeople start depending on your projects, and you feel responsible for keeping the lights on.\nTo ease the burden, I'd love to have deeper insights into my project usage: visitor statistics for longer than two weeks,\na front page where you could filter and search for events, a better way to handle discussions\n(which can get out of hand quickly), better CI integration à la Gitlab.\nThese features would be targeted at the top 10% of Github users, a group of 3 million people.\nWould this be enough to pay the bills? Probably. Would it be enough to grow? Probably not.\nSo what?\nI don't think the acquisition will kill the culture. Microsoft is a strong partner and Nat Friedman is one of us.\nOn the other side, I'm not as enthusiastic as I used to be.\nThere's room for competitors now and I'm beginning to wonder what will be the next Github.\nThat said, I will keep the Octocat on my office wall, in the hope that the excitement comes back.\n" + "body": " Photo of my office with Github’s octocat on the wall over my couch So I’m in a bit of a sentimental mood lately. Github got acquired by Microsoft. While I think the acquisition was well-deserved, I still wish it didn’t happen. Let me explain. My early days I joined Github on 3rd of January 2010. Since I was a bit late to the game, my usual handle (mre) was already taken. So I naively sent a mail to Github, asking if I could bag the name as it seemed to be abandoned. To my surprise, I got an answer. The response came from a guy named Chris Wanstrath. All he wrote was “it’s yours.” That was the moment I fell in love with Github. I felt encouraged to collaborate on projects, that everybody could contribute something valuable. Only later I found out that Chris was one of the founders and the CEO of the company. Living on Github Before Github, there was SourceForge, and I only went there to download binaries. Github showed me, that there was an entire community of like-minded people out there, who ❤️ to work on code in their free-time. To me, Github is much more than a git interface; it’s a social network. While other people browse Facebook or Instagram, I browse Github. I can still vividly remember getting my first star and my first issue on one of my projects coming from a real (!) person other than myself. After so many years, a pull-request still feels like the most personal gift anyone could give to me. Github - the culture After a while, I started to admire some Github employees deeply: Zach Holman (who is about my age) is a great writer, speaker, and one of the most creative developers I can think of. Scott Chacon, who taught me a lot about git and whose presentation tool, showoff, I’ve used at University. Tom Preston-Werner, who I admire for refusing an offer from Microsoft to pursue his dream and build Github, for establishing a super-nerdy company culture, and for Jekyll. All three developers have since left the company. I can’t help but notice that Github has changed. The harassment accusations and letting Zach Holman go are only part of the story. It has become a company like any other, maintaining a mature product. It doesn’t excite me anymore. An alternative reality There’s still a bitter taste in my mouth when I think that Github has fallen prey to one of the tech giants. I loved Github while it was a small, friendly community of passionate developers. Could this have been sustainable? Maybe through paid features for project maintainers. You see, if you do Open Source every day, it can be a lot of work. People start depending on your projects, and you feel responsible for keeping the lights on. To ease the burden, I’d love to have deeper insights into my project usage: visitor statistics for longer than two weeks, a front page where you could filter and search for events, a better way to handle discussions (which can get out of hand quickly), better CI integration à la Gitlab. These features would be targeted at the top 10% of Github users, a group of 3 million people. Would this be enough to pay the bills? Probably. Would it be enough to grow? Probably not. So what? I don’t think the acquisition will kill the culture. Microsoft is a strong partner and Nat Friedman is one of us. On the other side, I’m not as enthusiastic as I used to be. There’s room for competitors now and I’m beginning to wonder what will be the next Github. That said, I will keep the Octocat on my office wall, in the hope that the excitement comes back. " }, { "title": "Ten Years of Vim", "url": "https://endler.dev/2018/ten-years-of-vim/", - "body": "\n\n\nWhen I opened Vim by accident for the first time, I thought it was broken. My\nkeystrokes changed the screen in unpredictable ways, and I wanted to undo things\nand quit. Needless to say, it was an unpleasant experience. There was something\nabout it though, that kept me coming back and it became my main editor.\nFast forward ten years (!) and I still use Vim.\nAfter all the Textmates and Atoms and PhpStorms I tried, I still find myself at home in Vim.\nPeople keep asking me: Why is that?\nWhy Vim?\nBefore Vim, I had used many other editors like notepad or nano. They all behaved more or less as expected: you insert text, you move your cursor with the arrow keys or your mouse, and you save with Control + S or by using the menu bar. VI (and Vim, its spiritual successor) is different.\nEVERYTHING in Vim is different, and that's why it's so highly effective. Let me explain.\nThe Zen of Vim\nThe philosophy behind Vim takes a while to sink in: \nWhile other editors focus on writing as the central part of working with text, Vim thinks it's editing.\nYou see, most of the time I don't spend writing new text; instead, I edit existing text.\nI mold text, form it, turn it upside down.\nWriting text is craftsmanship and hard work. You have to shape your thoughts with your cold, bare hands until they somewhat form a coherent whole.\nThis painful process is what Vim tries to make at least bearable. It helps you keep control.\nIt does that, by providing you sharp, effective tools to modify text.\nThe core of Vim is a language for editing text.\nVim, the language\nThe Vim commands are not cryptic, you already know them.\n\nTo undo, type u.\nTo find the next t, type ft.\nTo delete a word, type daw.\nTo change a sentence, type cas.\n\nMore often than not, you can guess the correct command by thinking of an operation you want to execute and an object to execute it on.\nThen just take the first character of every word. Try it!\nIf anything goes wrong, you can always hit ESC and type u for undo.\nOperations: delete, find, change, back, insert, append,...\nObjects: word, sentence, parentheses, (html) tag,... (see :help text-objects)\nInserting text is just another editing operation, which can be triggered with i. \nThat's why, by default, you are in normal mode — also called command mode — where all those operations work. \nOnce you know this, Vim makes a lot more sense, and that's when you start to be productive.\nHow my workflow changed over the years\nWhen I was a beginner, I was very interested in how people with more Vim experience would use the editor.\nNow that I'm a long-time user, here's my answer: there's no secret sauce.\nI certainly feel less exhausted after editing text for a day, but 90% of the commands I use fit on a post-it note.\nThat said, throughout the years, my Vim habits changed.\nI went through several phases:\nYear 1: I'm happy if I can insert text and quit again.\nYear 2: That's cool, let's learn more shortcuts.\nYear 3-5: Let's add all the features!!!\nYear 6-10: My .vimrc is five lines long.\nYear three is when I started to learn the Vim ecosystem for real.\nI tried all sorts of flavors like MacVim and distributions like janus.\nFor a while, I even maintained my own Vim configuration\n, which was almost 400 lines long.\nAll of that certainly helped me learn what's out there, but I'm not sure if I would recommend that to a Vim beginner.\nAfter all, you don't really need all of that. Start with a vanilla Vim editor which works just fine!\nMy current Vim setup is pretty minimalistic. I don't use plugins anymore, mostly out of laziness and because built-in Vim commands or macros can replace them.\nHere are three concrete examples of how my workflow changed over the years:\n\n\nIn the beginning, I used a lot of "number powered movements". That is, if you have a command like b, which goes back one word in the text, you can also say 5b to go back five words. Nowadays I mostly use / to move to a matching word because it's quicker.\n\n\nI don't use arrow keys to move around in text anymore but forced myself to use h, j, k, l. Many people say that this is faster. After trying this for a few years, I don't think that is true (at least for me). I now just stick to it out of habit.\n\n\nOn my main working machine I use Vim for quick text editing and Visual Studio Code plus the awesome Vim plugin for projects. This way, I get the best of both worlds.\n\n\nWorkflow issues I still struggle with\nAfter all these years I'm still not a Vim master — far from it. \nAs every other Vim user will tell you, we're all still learning. \nHere are a few things I wish I could do better:\n\nJumping around in longer texts: I know the basics, like searching (/), jumping to a matching bracket (%) or jumping to specific lines (for line 10, type 10G), but I still could use symbols more often for navigation.\nUsing visual mode for moving text around: Sometimes it can be quite complicated to type the right combination of letters to cut (delete) the text I want to move around. That's where visual mode (v) shines. It highlights the selected text. I should use it more often.\nMultiple registers for copy and paste: Right now I only use one register (like a pastebin) for copying text, but Vim supports multiple registers. That's cool if you want to move around more than one thing at the same time. Let's use more of those!\nTabs: I know how tabs work, but all the typing feels clunky. That's why I never extensively used them. Instead, I mostly use multiple terminal tabs or an IDE with Vim bindings for bigger projects.\n\nWould I learn Vim again?\nThat's a tough question to answer.\nOn one side, I would say no. \nThere's a steep learning curve in Vim and seeing all those modern IDEs become better at understanding the user's intent, editing text became way easier and faster in general.\nOn the other side, Vim is the fastest way for me to write down my thoughts and code. As a bonus, it runs on every machine and might well be around in decades to come. In contrast, I don't know if the IntelliJ shortcuts will be relevant in ten years (note: if you read this in the future and ask yourself "What is IntelliJ?", the answer might be no).\nTakeaways\nIf I can give you one tip, don't learn Vim by memorizing commands. Instead, look at your current workflow and try to make it better, then see how Vim can make that easier. It helps to look at other people using Vim to get inspired (Youtube link with sound).\nYou will spend a lot of time writing text, so it's well worth the time investment to learn one editor really well — especially if you are a programmer.\nAfter ten years, Vim is somehow ingrained in my mind. I think Vim when I'm editing text. It has become yet another natural language to me. I'm looking forward to the next ten years.\n" + "body": " When I opened Vim by accident for the first time, I thought it was broken. My keystrokes changed the screen in unpredictable ways, and I wanted to undo things and quit. Needless to say, it was an unpleasant experience. There was something about it though, that kept me coming back and it became my main editor. Fast forward ten years (!) and I still use Vim. After all the Textmates and Atoms and PhpStorms I tried, I still find myself at home in Vim. People keep asking me: Why is that? Why Vim? Before Vim, I had used many other editors like notepad or nano. They all behaved more or less as expected: you insert text, you move your cursor with the arrow keys or your mouse, and you save with Control + S or by using the menu bar. VI (and Vim, its spiritual successor) is different. EVERYTHING in Vim is different, and that’s why it’s so highly effective. Let me explain. The Zen of Vim The philosophy behind Vim takes a while to sink in: While other editors focus on writing as the central part of working with text, Vim thinks it’s editing. You see, most of the time I don’t spend writing new text; instead, I edit existing text. I mold text, form it, turn it upside down. Writing text is craftsmanship and hard work. You have to shape your thoughts with your cold, bare hands until they somewhat form a coherent whole. This painful process is what Vim tries to make at least bearable. It helps you keep control. It does that, by providing you sharp, effective tools to modify text. The core of Vim is a language for editing text. Vim, The Language The Vim commands are not cryptic, you already know them. To undo, type u. To find the next t, type ft. To delete a word, type daw. To change a sentence, type cas. More often than not, you can guess the correct command by thinking of an operation you want to execute and an object to execute it on. Then just take the first character of every word. Try it! If anything goes wrong, you can always hit ESC and type u for undo. Operations: delete, find, change, back, insert, append,… Objects: word, sentence, parentheses, (html) tag,… (see :help text-objects) Inserting text is just another editing operation, which can be triggered with i. That’s why, by default, you are in normal mode — also called command mode — where all those operations work. Once you know this, Vim makes a lot more sense, and that’s when you start to be productive. How My Workflow Changed Over The Years When I was a beginner, I was very interested in how people with more Vim experience would use the editor. Now that I’m a long-time user, here’s my answer: there’s no secret sauce. I certainly feel less exhausted after editing text for a day, but 90% of the commands I use fit on a post-it note. That said, throughout the years, my Vim habits changed. I went through several phases: Year 1: I’m happy if I can insert text and quit again. Year 2: That’s cool, let’s learn more shortcuts. Year 3-5: Let’s add all the features!!! Year 6-10: My .vimrc is five lines long. Year three is when I started to learn the Vim ecosystem for real. I tried all sorts of flavors like MacVim and distributions like janus. For a while, I even maintained my own Vim configuration , which was almost 400 lines long. All of that certainly helped me learn what’s out there, but I’m not sure if I would recommend that to a Vim beginner. After all, you don’t really need all of that. Start with a vanilla Vim editor which works just fine! My current Vim setup is pretty minimalistic. I don’t use plugins anymore, mostly out of laziness and because built-in Vim commands or macros can replace them. Here are three concrete examples of how my workflow changed over the years: In the beginning, I used a lot of “number powered movements”. That is, if you have a command like b, which goes back one word in the text, you can also say 5b to go back five words. Nowadays I mostly use &#x2F; to move to a matching word because it’s quicker. I don’t use arrow keys to move around in text anymore but forced myself to use h, j, k, l. Many people say that this is faster. After trying this for a few years, I don’t think that is true (at least for me). I now just stick to it out of habit. On my main working machine I use Vim for quick text editing and Visual Studio Code plus the awesome Vim plugin for projects. This way, I get the best of both worlds. Workflow Issues I Still Struggle With After all these years I’m still not a Vim master — far from it. As every other Vim user will tell you, we’re all still learning. Here are a few things I wish I could do better: Jumping around in longer texts: I know the basics, like searching (&#x2F;), jumping to a matching bracket (%) or jumping to specific lines (for line 10, type 10G), but I still could use symbols more often for navigation. Using visual mode for moving text around: Sometimes it can be quite complicated to type the right combination of letters to cut (delete) the text I want to move around. That’s where visual mode (v) shines. It highlights the selected text. I should use it more often. Multiple registers for copy and paste: Right now I only use one register (like a pastebin) for copying text, but Vim supports multiple registers. That’s cool if you want to move around more than one thing at the same time. Let’s use more of those! Tabs: I know how tabs work, but all the typing feels clunky. That’s why I never extensively used them. Instead, I mostly use multiple terminal tabs or an IDE with Vim bindings for bigger projects. Would I learn Vim again? That’s a tough question to answer. On one side, I would say no. There’s a steep learning curve in Vim and seeing all those modern IDEs become better at understanding the user’s intent, editing text became way easier and faster in general. On the other side, Vim is the fastest way for me to write down my thoughts and code. As a bonus, it runs on every machine and might well be around for decades to come. In contrast, I don’t know if the IntelliJ shortcuts will be relevant in ten years (note: if you read this in the future and ask yourself “What is IntelliJ?”, the answer might be no). Takeaways If I can give you one tip, don’t learn Vim by memorizing commands. Instead, look at your current workflow and try to make it better, then see how Vim can make that easier. It helps to look at other people using Vim to get inspired (Youtube link with sound). You will spend a lot of time writing text, so it’s well worth the time investment to learn one editor really well — especially if you are a programmer. After ten years, Vim is somehow ingrained in my mind. I think Vim when I’m editing text. It has become yet another natural language to me. I’m looking forward to the next ten years. " }, { "title": "Refactoring Go Code to Avoid File I/O in Unit Tests", "url": "https://endler.dev/2018/go-io-testing/", - "body": "At work today, I refactored some simple Go code to make it more testable.\nThe idea was to avoid file handling in unit tests without mocking or using temporary files by separating data input/output and data manipulation.\n\n\n \n \n A gopher reading a long computer printout\n \n Illustration by Marcus Olsson CC BY-NC-SA 4.0\n \n\nI was surprised that I couldn't find a simple explanation on sites like StackOverflow,\nwhich is why I wrote down some notes myself so that others can refer to it in the future.\nOur example code\nThe initial version looked like this:\n\npackage main\n\nimport (\n\t"bufio"\n\t"io/ioutil"\n\t"os"\n)\n\nfunc main() {\n\tanalyze("test.txt")\n}\n\nfunc analyze(file string) error {\n\thandle, err := os.Open(file)\n\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer handle.Close()\n\n\tscanner := bufio.NewScanner(handle)\n\tfor scanner.Scan() {\n\t\t// Do something with line\n\t\t_ = scanner.Text()\n\t}\n\treturn nil\n}\n\nAs you can see, we take a filename as input, and we open that file inside the analyze function to do something with its contents.\nWriting our first test for the code\nA typical test harness for that code might look like this:\n\npackage main\n\nimport "testing"\n\nfunc Test_analyze(t *testing.T) {\n\tt.Run("Test something", func(t *testing.T) {\n\t\tif err := analyze("test.txt"); (err != nil) != false {\n\t\t\tt.Errorf("analyze() error = %v", err)\n\t\t}\n\t})\n}\n\nAll fine and good?\nProblems\nThis will work, but file I/O while running tests is not always the best idea.\nFor one, you could be running in a constrained environment, where you don't have access to the file.\nWe could use temporary files to avoid this.\nBut there might be problems with disk I/O, which makes for flaky tests and frustration.\nAnother process could also modify the file during the test.\nAll these issues have nothing to do with your code.\nFurthermore, it's not enough to just look at the test and see exactly what's going on. You also have to read the text file first.\nA lot of people suggest mocking instead. \nThere are quite a few powerful libraries like spf13/afero for this purpose.\nThese packages will create temporary files in the background and clean up afterward.\nIn my opinion, mocking should be the last resort when it comes to testing. Before you mock, check that you use the right abstractions in your code.\nMaybe implementing against an interface or using Dependency Injection helps decouple components?\nMore often than not, a clear separation of concerns is all you need.\nRefactoring to make testing easier\nIn my case above, we can easily avoid using mocks and temporary files by decoupling file I/O from the analysis.\nWe do so by refactoring our analyze function to call doSomething, which takes an io.Reader.\n(You could also use an array of strings for now.)\nOur main.go now looks like this:\n\npackage main\n\nimport (\n\t"bufio"\n\t"io"\n\t"os"\n)\n\nfunc main() {\n\tanalyze("test.txt")\n}\n\nfunc analyze(file string) error {\n\thandle, err := os.Open(file)\n\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer handle.Close()\n\treturn doSomething(handle)\n}\n\nfunc doSomething(handle io.Reader) error {\n\tscanner := bufio.NewScanner(handle)\n\tfor scanner.Scan() {\n\t\t// Do something with line\n\t\t_ = scanner.Text()\n\t}\n\treturn nil\n}\n\nNow we can test the actual analysis in isolation:\n\npackage main\n\nimport (\n\t"strings"\n\t"testing"\n)\n\nfunc Test_analyze(t *testing.T) {\n\tt.Run("Test something", func(t *testing.T) {\n\t\tif err := doSomething(strings.NewReader("This is a test string")); (err != nil) != false {\n\t\t\tt.Errorf("analyze() error = %v", err)\n\t\t}\n\t})\n}\n\nWe changed analyze("test.txt") to doSomething(strings.NewReader("This is a test string")).\n(Of course, we should also write a separate test for analyze(), but the focus is on decoupling the datasource-agnostic part here.)\nResult\nBy slightly refactoring our code, we gained the following advantages:\n\nSimple testability: No mocks or temporary files.\nSeparation of concerns: Each function does exactly one thing.\nEasier code re-use: The doSomething() function will work with any io.Reader and can be called from other places. We can even move it to its own library if we want.\n\nOn Reddit, user soapysops made an important remark:\n\nIn general, I prefer to not accept a file name in an API. A file name doesn't give users enough control. It doesn't let you use an unusual encoding, special file permissions, or a bytes.Buffer instead of an actual file, for example. Accepting a file name adds a huge dependency to the code: the file system, along with all of its associated OS specific stuff.\nSo I probably would have eliminated the file name based API and only exposed one based on io.Reader. That way, you have complete code coverage, fast tests, and far fewer edge cases to worry about.\n\nI totally agree with that sentiment.\nBut often times you can't simply change the user-facing API easily, because the API might be public and might already have users. \nThe refactoring above is just the first step towards better architecture. There is definitely a lot more you can do.\nIf that got you interested, also check out justforfunc #29: dependency injection in a code review, which talks about the same topic.\n" + "body": "At work today, I refactored some simple Go code to make it more testable. The idea was to avoid file handling in unit tests without mocking or using temporary files by separating data input&#x2F;output and data manipulation. A gopher reading a long computer printout Source: Illustration by Marcus Olsson CC BY-NC-SA 4.0 I was surprised that I couldn’t find a simple explanation on sites like StackOverflow, which is why I wrote down some notes myself so that others can refer to it in the future. Our example code The initial version looked like this: package main import ( &amp;quot;bufio&amp;quot; &amp;quot;io&#x2F;ioutil&amp;quot; &amp;quot;os&amp;quot; ) func main() analyze(&amp;quot;test.txt&amp;quot;) func analyze(file string) error handle, err := os.Open(file) if err != nil return err defer handle.Close() scanner := bufio.NewScanner(handle) for scanner.Scan() &#x2F;&#x2F; Do something with line _ = scanner.Text() return nil As you can see, we take a filename as input, and we open that file inside the analyze function to do something with its contents. Writing our first test for the code A typical test harness for that code might look like this: package main import &amp;quot;testing&amp;quot; func Test_analyze(t *testing.T) t.Run(&amp;quot;Test something&amp;quot;, func(t *testing.T) if err := analyze(&amp;quot;test.txt&amp;quot;); (err != nil) != false t.Errorf(&amp;quot;analyze() error = %v&amp;quot;, err) ) All fine and good? Problems This will work, but file I&#x2F;O while running tests is not always the best idea. For one, you could be running in a constrained environment, where you don’t have access to the file. We could use temporary files to avoid this. But there might be problems with disk I&#x2F;O, which makes for flaky tests and frustration. Another process could also modify the file during the test. All these issues have nothing to do with your code. Furthermore, it’s not enough to just look at the test and see exactly what’s going on. You also have to read the text file first. A lot of people suggest mocking instead. There are quite a few powerful libraries like spf13&#x2F;afero for this purpose. These packages will create temporary files in the background and clean up afterward. In my opinion, mocking should be the last resort when it comes to testing. Before you mock, check that you use the right abstractions in your code. Maybe implementing against an interface or using Dependency Injection helps decouple components? More often than not, a clear separation of concerns is all you need. Refactoring to make testing easier In my case above, we can easily avoid using mocks and temporary files by decoupling file I&#x2F;O from the analysis. We do so by refactoring our analyze function to call doSomething, which takes an io.Reader. (You could also use an array of strings for now.) Our main.go now looks like this: package main import ( &amp;quot;bufio&amp;quot; &amp;quot;io&amp;quot; &amp;quot;os&amp;quot; ) func main() analyze(&amp;quot;test.txt&amp;quot;) func analyze(file string) error handle, err := os.Open(file) if err != nil return err defer handle.Close() return doSomething(handle) func doSomething(handle io.Reader) error scanner := bufio.NewScanner(handle) for scanner.Scan() &#x2F;&#x2F; Do something with line _ = scanner.Text() return nil Now we can test the actual analysis in isolation: package main import ( &amp;quot;strings&amp;quot; &amp;quot;testing&amp;quot; ) func Test_analyze(t *testing.T) t.Run(&amp;quot;Test something&amp;quot;, func(t *testing.T) if err := doSomething(strings.NewReader(&amp;quot;This is a test string&amp;quot;)); (err != nil) != false t.Errorf(&amp;quot;analyze() error = %v&amp;quot;, err) ) We changed analyze( test.txt ) to doSomething(strings.NewReader( This is a test string )). (Of course, we should also write a separate test for analyze(), but the focus is on decoupling the datasource-agnostic part here.) Result By slightly refactoring our code, we gained the following advantages: Simple testability: No mocks or temporary files. Separation of concerns: Each function does exactly one thing. Easier code re-use: The doSomething() function will work with any io.Reader and can be called from other places. We can even move it to its own library if we want. On Reddit, user soapysops made an important remark: In general, I prefer to not accept a file name in an API. A file name doesn’t give users enough control. It doesn’t let you use an unusual encoding, special file permissions, or a bytes.Buffer instead of an actual file, for example. Accepting a file name adds a huge dependency to the code: the file system, along with all of its associated OS specific stuff. So I probably would have eliminated the file name based API and only exposed one based on io.Reader. That way, you have complete code coverage, fast tests, and far fewer edge cases to worry about. I totally agree with that sentiment. But often times you can’t simply change the user-facing API easily, because the API might be public and might already have users. The refactoring above is just the first step towards better architecture. There is definitely a lot more you can do to start writing robust, well-tested systems in Go. More Resources If that got you interested, also check out justforfunc #29: dependency injection in a code review, which covers the same topic: document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); A great resource that I can recommend is Learn Go with Tests. It teaches you test-driven development with Go and helps you get a grounding with TDD. Another one is The Go Programming Language book, co-authored by Brian W. Kernighan (of Unix fame), which shows how to write clear and idiomatic Go to solve real-world problems. It contains a dedicated chapter on interfaces and testing. It also covers io.Reader in more detail. The Go Programming Language book co-authored by Brian W. Kernighan (affiliate link) " }, { "title": "A Tiny `ls` Clone Written in Rust", "url": "https://endler.dev/2018/ls/", - "body": "In my series of useless Unix tools rewritten in Rust, today I'm going to be covering one of my all-time favorites: ls.\nFirst off, let me say that you probably don't want to use this code as a replacement for ls on your local machine (although you could!).\nAs we will find out, ls is actually quite a powerful tool under the hood.\nI'm not going to come up with a full rewrite, but instead only cover the very basic output that you would expect from calling ls -l on your command line.\nWhat is this output? I'm glad you asked.\nExpected output\n\n> ls -l\ndrwxr-xr-x 2 mendler staff 13468 Feb 4 11:19 Top Secret\n-rwxr--r-- 1 mendler staff 6323935 Mar 8 21:56 Never Gonna Give You Up - Rick Astley.mp3\n-rw-r--r-- 1 mendler staff 0 Feb 18 23:55 Thoughts on Chess Boxing.doc\n-rw-r--r-- 1 mendler staff 380434 Dec 24 16:00 nobel-prize-speech.txt\n\nYour output may vary, but generally, there are a couple of notable things going on. From left to right, we've got the following fields:\n\nThe drwx things in the beginning are the file permissions (also called the file mode). If d is set, it's a directory. r means read, w means write and x execute.\nThis rwx pattern gets repeated three times for the current user, the group, and other computer users respectively.\nNext we got the hardlink count when referring to a file, or the number of contained directory entries when referring to a directory. (Reference)\nOwner name\nGroup name\nNumber of bytes in the file\nDate when the file was last modified\nFinally, the path name\n\nFor more in-depth information, I can recommend reading the manpage of ls from the GNU coreutils used in most Linux distributions and the one from Darwin (which powers MacOS). \nWhew, that's a lot of information for such a tiny tool.\nBut then again, it can't be so hard to port that to Rust, right? Let's get started!\nA very basic ls in Rust\nHere is the most bare-bones version of ls, which just prints all files in the current directory:\n\nuse std::fs;\nuse std::path::Path;\nuse std::error::Error;\nuse std::process;\n\nfn main() {\n\tif let Err(ref e) = run(Path::new(".")) {\n\t\tprintln!("{}", e);\n\t\tprocess::exit(1);\n\t}\n}\n\nfn run(dir: &Path) -> Result<(), Box<Error>> {\n\tif dir.is_dir() {\n\t\tfor entry in fs::read_dir(dir)? {\n\t\t\t\tlet entry = entry?;\n\t\t\t\tlet file_name = entry\n\t\t\t\t\t\t.file_name()\n\t\t\t\t\t\t.into_string()\n\t\t\t\t\t\t.or_else(|f| Err(format!("Invalid entry: {:?}", f)))?;\n\t\t\t\tprintln!("{}", file_name);\n\t\t}\n\t}\n\tOk(())\n}\n\n\nWe can copy that straight out of the documentation.\nWhen we run it, we get the expected output:\n\n> cargo run\nCargo.lock\nCargo.toml\nsrc\ntarget\n\nIt prints the files and exits. Simple enough.\nWe should stop for a moment and celebrate our success, knowing that we just wrote our first little Unix utility from scratch.\nPro Tip: You can install the binary with cargo install and call it like any other binary from now on.\nBut we have higher goals, so let's continue.\nAdding a parameter to specify the directory\nUsually, if we type ls mydir, we expect to get the file listing of no other directory than mydir. We should add the same functionality to our version.\nTo do this, we need to accept command line parameters.\nOne Rust crate that I love to use in this case is structopt. It makes argument parsing very easy.\nAdd it to your Cargo.toml. (You need cargo-edit for the following command).\n\ncargo add structopt\n\nNow we can import it and use it in our project:\n\n#[macro_use]\nextern crate structopt;\n\n// use std::...\nuse structopt::StructOpt;\n\n#[derive(StructOpt, Debug)]\nstruct Opt {\n\t/// Output file\n\t#[structopt(default_value = ".", parse(from_os_str))]\n\tpath: PathBuf,\n}\n\nfn main() {\n\tlet opt = Opt::from_args();\n\tif let Err(ref e) = run(&opt.path) {\n\t\t\tprintln!("{}", e);\n\t\t\tprocess::exit(1);\n\t}\n}\n\nfn run(dir: &PathBuf) -> Result<(), Box<Error>> {\n\t// Same as before\n}\n\nBy adding the Opt struct, we can define the command line flags, input parameters, and the help output super easily.\nThere are tons of configuration options, so it's worth checking out the project homepage.\nAlso note, that we changed the type of the path variable from Path to PathBuf. The difference is, that PathBuf owns the inner path string, while Path simply provides a reference to it. The relationship is similar to String and &str.\nReading the modification time\nNow let's deal with the metadata.\nFirst, we try to retrieve the modification time from the file.\nA quick look at the documentation shows us how to do it:\n\nuse std::fs;\n\nlet metadata = fs::metadata("foo.txt")?;\n\nif let Ok(time) = metadata.modified() {\n\tprintln!("{:?}", time);\n}\n\nThe output might not be what you expect: we receive a SystemTime object, which represents the measurement of the system clock. E.g. this code\n\nprintln!("{:?}", SystemTime::now());\n// Prints: SystemTime { tv_sec: 1520554933, tv_nsec: 610406401 }\n\nBut the format that we would like to have is something like this:\n\nMar 9 01:24\n\nThankfully, there is a library called chrono, which can read this format and convert it into any human readable output we like:\n\nlet current: DateTime<Local> = DateTime::from(SystemTime::now());\nprintln!("{}", current.format("%_d %b %H:%M").to_string());\n\nthis prints\n\n9 Mar 01:29\n\n(Yeah, I know it's getting late.)\nArmed with that knowledge, we can now read our file modification time.\n\ncargo add chrono\n\nuse chrono::{DateTime, Local};\n\nfn run(dir: &PathBuf) -> Result<(), Box<Error>> {\n\tif dir.is_dir() {\n\t\tfor entry in fs::read_dir(dir)? {\n\t\t\tlet entry = entry?;\n\t\t\tlet file_name = ...\n\n\t\t\tlet metadata = entry.metadata()?;\n\t\t\tlet size = metadata.len();\n\t\t\tlet modified: DateTime<Local> = DateTime::from(metadata.modified()?);\n\n\t\t\tprintln!(\n\t\t\t\t"{:>5} {} {}",\n\t\t\t\tsize,\n\t\t\t\tmodified.format("%_d %b %H:%M").to_string(),\n\t\t\t\tfile_name\n\t\t\t);\n\t\t}\n\t}\n\tOk(())\n}\n\nThis {:>5} might look weird. It's a formatting directive provided by std::fmt.\nIt means "right align this field with a space padding of 5" - just like our bigger brother ls -l is doing it.\nSimilarly, we retrieved the size in bytes with metadata.len().\nUnix file permissions are a zoo\nReading the file permissions is a bit more tricky.\nWhile the rwx notation is very common in Unix derivatives such as *BSD or GNU/Linux, many other operating systems ship their own permission management.\nThere are even differences between the Unix derivatives.\nWikipedia lists a few extensions to the file permissions that you might encounter:\n\n+ (plus) suffix indicates an access control list that can control additional permissions.\n. (dot) suffix indicates an SELinux context is present. Details may be listed with the command ls -Z.\n@ suffix indicates extended file attributes are present.\n\nThat just goes to show, that there are a lot of important details to be considered when implementing this in real life.\nImplementing very basic file mode\nFor now, we just stick to the basics and assume we are on a platform that supports the rwx file mode.\nBehind the r, the w and the x are in reality octal numbers. That's easier for computers to work with and many hardcore users even prefer to type the numbers over the symbols.\nThe ruleset behind those octals is as follows. I took that from the chmod manpage.\n\n\tModes may be absolute or symbolic. \n\tAn absolute mode is an octal number constructed \n\tfrom the sum of one or more of the following values\n\n\t 0400 Allow read by owner.\n\t 0200 Allow write by owner.\n\t 0100 For files, allow execution by owner.\n\t 0040 Allow read by group members.\n\t 0020 Allow write by group members.\n\t 0010 For files, allow execution by group members.\n\t 0004 Allow read by others.\n\t 0002 Allow write by others.\n\t 0001 For files, allow execution by others.\n\nFor example, to set the permissions for a file so that the owner can read, write and execute it and nobody else can do anything would be 700 (400 + 200 +100).\nGranted, those numbers are the same since the 70s and are not going to change soon, but it's still a bad idea to compare our file permissions directly with the values; if not for compatibility reasons, then for readability and to avoid magic numbers in our code.\nTherefore, we use the libc crate, which provides constants for those magic numbers.\nAs mentioned above, these file permissions are Unix specific, so we need to import a Unix-only library named std::os::unix::fs::PermissionsExt; for that.\n\nextern crate libc;\n\n// Examples:\n// * `S_IRGRP` stands for "read permission for group",\n// * `S_IXUSR` stands for "execution permission for user"\nuse libc::{S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH, S_IXUSR};\nuse std::os::unix::fs::PermissionsExt;\n\nWe can now get the file permissions like so:\n\nlet metadata = entry.metadata()?;\nlet mode = metadata.permissions().mode();\nparse_permissions(mode as u16);\n\nparse_permissions() is a little helper function defined as follows:\n\nfn parse_permissions(mode: u16) -> String {\n\tlet user = triplet(mode, S_IRUSR, S_IWUSR, S_IXUSR);\n\tlet group = triplet(mode, S_IRGRP, S_IWGRP, S_IXGRP);\n\tlet other = triplet(mode, S_IROTH, S_IWOTH, S_IXOTH);\n\t[user, group, other].join("")\n}\n\nIt takes the file mode as a u16 (simply because the libc constants are u16)\nand calls triplet on it.\nFor each flag read, write, and execute, it runs a binary & operation on mode.\nThe output is matched exhaustively against all possible permission patterns.\n\nfn triplet(mode: u16, read: u16, write: u16, execute: u16) -> String {\n\tmatch (mode & read, mode & write, mode & execute) {\n\t\t(0, 0, 0) => "---",\n\t\t(_, 0, 0) => "r--",\n\t\t(0, _, 0) => "-w-",\n\t\t(0, 0, _) => "--x",\n\t\t(_, 0, _) => "r-x",\n\t\t(_, _, 0) => "rw-",\n\t\t(0, _, _) => "-wx",\n\t\t(_, _, _) => "rwx",\n\t}.to_string()\n}\nWrapping up\nThe final output looks like this. Close enough.\n\n> cargo run\nrw-r--r-- 7 6 Mar 23:10 .gitignore\nrw-r--r-- 15618 8 Mar 00:41 Cargo.lock\nrw-r--r-- 185 8 Mar 00:41 Cargo.toml\nrwxr-xr-x 102 5 Mar 21:31 src\nrwxr-xr-x 136 6 Mar 23:07 target\n\nThat's it! You can find the final version of our toy ls on Github.\nWe are still far away from a full-fledged ls replacement, but at least we learned a thing or two about its internals.\nIf you're looking for a proper ls replacement written in Rust, go check out exa.\nIf, instead, you want to read another blog post from the same series, check out A Little Story About the yes Unix Command.\n" + "body": "In my series of useless Unix tools rewritten in Rust, today I’m going to be covering one of my all-time favorites: ls. First off, let me say that you probably don’t want to use this code as a replacement for ls on your local machine (although you could!). As we will find out, ls is actually quite a powerful tool under the hood. I’m not going to come up with a full rewrite, but instead only cover the very basic output that you would expect from calling ls -l on your command line. What is this output? I’m glad you asked. Expected output &amp;gt; ls -l drwxr-xr-x 2 mendler staff 13468 Feb 4 11:19 Top Secret -rwxr--r-- 1 mendler staff 6323935 Mar 8 21:56 Never Gonna Give You Up - Rick Astley.mp3 -rw-r--r-- 1 mendler staff 0 Feb 18 23:55 Thoughts on Chess Boxing.doc -rw-r--r-- 1 mendler staff 380434 Dec 24 16:00 nobel-prize-speech.txt Your output may vary, but generally, there are a couple of notable things going on. From left to right, we’ve got the following fields: The drwx things in the beginning are the file permissions (also called the file mode). If d is set, it’s a directory. r means read, w means write and x execute. This rwx pattern gets repeated three times for the current user, the group, and other computer users respectively. Next we got the hardlink count when referring to a file, or the number of contained directory entries when referring to a directory. (Reference) Owner name Group name Number of bytes in the file Date when the file was last modified Finally, the path name For more in-depth information, I can recommend reading the manpage of ls from the GNU coreutils used in most Linux distributions and the one from Darwin (which powers MacOS). Whew, that’s a lot of information for such a tiny tool. But then again, it can’t be so hard to port that to Rust, right? Let’s get started! A very basic ls in Rust Here is the most bare-bones version of ls, which just prints all files in the current directory: use std::fs; use std::path::Path; use std::error::Error; use std::process; fn main() if let Err(ref e) = run(Path::new(&amp;quot;.&amp;quot;)) println!(&amp;quot; &amp;quot;, e); process::exit(1); fn run(dir: &amp;amp;Path) -&amp;gt; Result&amp;lt;(), Box&amp;lt;Error&amp;gt;&amp;gt; if dir.is_dir() for entry in fs::read_dir(dir)? let entry = entry?; let file_name = entry .file_name() .into_string() .or_else(|f| Err(format!(&amp;quot;Invalid entry: :? &amp;quot;, f)))?; println!(&amp;quot; &amp;quot;, file_name); Ok(()) We can copy that straight out of the documentation. When we run it, we get the expected output: &amp;gt; cargo run Cargo.lock Cargo.toml src target It prints the files and exits. Simple enough. We should stop for a moment and celebrate our success, knowing that we just wrote our first little Unix utility from scratch. Pro Tip: You can install the binary with cargo install and call it like any other binary from now on. But we have higher goals, so let’s continue. Adding a parameter to specify the directory Usually, if we type ls mydir, we expect to get the file listing of no other directory than mydir. We should add the same functionality to our version. To do this, we need to accept command line parameters. One Rust crate that I love to use in this case is structopt. It makes argument parsing very easy. Add it to your Cargo.toml. (You need cargo-edit for the following command). cargo add structopt Now we can import it and use it in our project: #[macro_use] extern crate structopt; &#x2F;&#x2F; use std::... use structopt::StructOpt; #[derive(StructOpt, Debug)] struct Opt &#x2F;&#x2F;&#x2F; Output file #[structopt(default_value = &amp;quot;.&amp;quot;, parse(from_os_str))] path: PathBuf, fn main() let opt = Opt::from_args(); if let Err(ref e) = run(&amp;amp;opt.path) println!(&amp;quot; &amp;quot;, e); process::exit(1); fn run(dir: &amp;amp;PathBuf) -&amp;gt; Result&amp;lt;(), Box&amp;lt;Error&amp;gt;&amp;gt; &#x2F;&#x2F; Same as before By adding the Opt struct, we can define the command line flags, input parameters, and the help output super easily. There are tons of configuration options, so it’s worth checking out the project homepage. Also note, that we changed the type of the path variable from Path to PathBuf. The difference is, that PathBuf owns the inner path string, while Path simply provides a reference to it. The relationship is similar to String and &amp;amp;str. Reading the modification time Now let’s deal with the metadata. First, we try to retrieve the modification time from the file. A quick look at the documentation shows us how to do it: use std::fs; let metadata = fs::metadata(&amp;quot;foo.txt&amp;quot;)?; if let Ok(time) = metadata.modified() println!(&amp;quot; :? &amp;quot;, time); The output might not be what you expect: we receive a SystemTime object, which represents the measurement of the system clock. E.g. this code println!(&amp;quot; :? &amp;quot;, SystemTime::now()); &#x2F;&#x2F; Prints: SystemTime tv_sec: 1520554933, tv_nsec: 610406401 But the format that we would like to have is something like this: Mar 9 01:24 Thankfully, there is a library called chrono, which can read this format and convert it into any human readable output we like: let current: DateTime&amp;lt;Local&amp;gt; = DateTime::from(SystemTime::now()); println!(&amp;quot; &amp;quot;, current.format(&amp;quot;%_d %b %H:%M&amp;quot;).to_string()); this prints 9 Mar 01:29 (Yeah, I know it’s getting late.) Armed with that knowledge, we can now read our file modification time. cargo add chrono use chrono:: DateTime, Local ; fn run(dir: &amp;amp;PathBuf) -&amp;gt; Result&amp;lt;(), Box&amp;lt;Error&amp;gt;&amp;gt; if dir.is_dir() for entry in fs::read_dir(dir)? let entry = entry?; let file_name = ... let metadata = entry.metadata()?; let size = metadata.len(); let modified: DateTime&amp;lt;Local&amp;gt; = DateTime::from(metadata.modified()?); println!( &amp;quot; :&amp;gt;5 &amp;quot;, size, modified.format(&amp;quot;%_d %b %H:%M&amp;quot;).to_string(), file_name ); Ok(()) This :&amp;gt;5 might look weird. It’s a formatting directive provided by std::fmt. It means “right align this field with a space padding of 5” - just like our bigger brother ls -l is doing it. Similarly, we retrieved the size in bytes with metadata.len(). Unix file permissions are a zoo Reading the file permissions is a bit more tricky. While the rwx notation is very common in Unix derivatives such as *BSD or GNU&#x2F;Linux, many other operating systems ship their own permission management. There are even differences between the Unix derivatives. Wikipedia lists a few extensions to the file permissions that you might encounter: + (plus) suffix indicates an access control list that can control additional permissions. . (dot) suffix indicates an SELinux context is present. Details may be listed with the command ls -Z. @ suffix indicates extended file attributes are present. That just goes to show, that there are a lot of important details to be considered when implementing this in real life. Implementing very basic file mode For now, we just stick to the basics and assume we are on a platform that supports the rwx file mode. Behind the r, the w and the x are in reality octal numbers. That’s easier for computers to work with and many hardcore users even prefer to type the numbers over the symbols. The ruleset behind those octals is as follows. I took that from the chmod manpage. Modes may be absolute or symbolic. An absolute mode is an octal number constructed from the sum of one or more of the following values 0400 Allow read by owner. 0200 Allow write by owner. 0100 For files, allow execution by owner. 0040 Allow read by group members. 0020 Allow write by group members. 0010 For files, allow execution by group members. 0004 Allow read by others. 0002 Allow write by others. 0001 For files, allow execution by others. For example, to set the permissions for a file so that the owner can read, write and execute it and nobody else can do anything would be 700 (400 + 200 +100). Granted, those numbers are the same since the 70s and are not going to change soon, but it’s still a bad idea to compare our file permissions directly with the values; if not for compatibility reasons, then for readability and to avoid magic numbers in our code. Therefore, we use the libc crate, which provides constants for those magic numbers. As mentioned above, these file permissions are Unix specific, so we need to import a Unix-only library named std::os::unix::fs::PermissionsExt; for that. extern crate libc; &#x2F;&#x2F; Examples: &#x2F;&#x2F; * `S_IRGRP` stands for &amp;quot;read permission for group&amp;quot;, &#x2F;&#x2F; * `S_IXUSR` stands for &amp;quot;execution permission for user&amp;quot; use libc:: S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH, S_IXUSR ; use std::os::unix::fs::PermissionsExt; We can now get the file permissions like so: let metadata = entry.metadata()?; let mode = metadata.permissions().mode(); parse_permissions(mode as u16); parse_permissions() is a little helper function defined as follows: fn parse_permissions(mode: u16) -&amp;gt; String let user = triplet(mode, S_IRUSR, S_IWUSR, S_IXUSR); let group = triplet(mode, S_IRGRP, S_IWGRP, S_IXGRP); let other = triplet(mode, S_IROTH, S_IWOTH, S_IXOTH); [user, group, other].join(&amp;quot;&amp;quot;) It takes the file mode as a u16 (simply because the libc constants are u16) and calls triplet on it. For each flag read, write, and execute, it runs a binary &amp;amp; operation on mode. The output is matched exhaustively against all possible permission patterns. fn triplet(mode: u16, read: u16, write: u16, execute: u16) -&amp;gt; String match (mode &amp;amp; read, mode &amp;amp; write, mode &amp;amp; execute) (0, 0, 0) =&amp;gt; &amp;quot;---&amp;quot;, (_, 0, 0) =&amp;gt; &amp;quot;r--&amp;quot;, (0, _, 0) =&amp;gt; &amp;quot;-w-&amp;quot;, (0, 0, _) =&amp;gt; &amp;quot;--x&amp;quot;, (_, 0, _) =&amp;gt; &amp;quot;r-x&amp;quot;, (_, _, 0) =&amp;gt; &amp;quot;rw-&amp;quot;, (0, _, _) =&amp;gt; &amp;quot;-wx&amp;quot;, (_, _, _) =&amp;gt; &amp;quot;rwx&amp;quot;, .to_string() Wrapping up The final output looks like this. Close enough. &amp;gt; cargo run rw-r--r-- 7 6 Mar 23:10 .gitignore rw-r--r-- 15618 8 Mar 00:41 Cargo.lock rw-r--r-- 185 8 Mar 00:41 Cargo.toml rwxr-xr-x 102 5 Mar 21:31 src rwxr-xr-x 136 6 Mar 23:07 target That’s it! You can find the final version of our toy ls on Github. We are still far away from a full-fledged ls replacement, but at least we learned a thing or two about its internals. If you’re looking for a proper ls replacement written in Rust, go check out lsd. If, instead, you want to read another blog post from the same series, check out A Little Story About the yes Unix Command. " }, { "title": "Rust in 2018", "url": "https://endler.dev/2018/rust-2018/", - "body": "I wrote about the future of Rust before and it seems like nobody stops me from doing it again! Quite the contrary: this time the Rust core team even asked for it.\nI'm a bit late to the party, but here are my 2 cents about the priorities for Rust in 2018.\n\nWho is this guy?\nThere's a depressingly high chance that we've never met before — which is a real shame.\nFor some context: I come from dynamically typed languages like Python and PHP.\nRust was the first language that allowed me to write real low-level code without feeling like arguing with a bouncer. \nTo me, Rust is not a fireflower, it's my own personal Megazord1.\nI want Rust to win, but for that, we need to tick a few points off the list.\nCompiler documentation for easier contribution\n\nWhen I was in Columbus, Ohio for Rust Belt Rust, I met Niko Matsakis, Ariel Ben-Yehuda, and Santiago Pastorino.\nThose fine gentlemen eagerly worked on non-lexical lifetimes during the impl-period.\nWatching them hack away on the compiler was deeply inspirational to me, and I started wondering if I could contribute, too.\nNeedless to say, the barrier to entry for hacking on the compiler can be quite high.\nI didn't contribute anything yet.\nOne thing I'd love to do is to spend short 30-60 minute chunks of time to fix a small thing in the compiler here and there. Could be as simple as renaming a variable, writing a test or adding some documentation.\nHence my first wish is, that contributing to the language will become easier.\nThat could be achieved by providing extensive mentorship, more entry-level tickets, and better compiler documentation.\nAll of that was already suggested by Niko.\nMore resources for intermediate programmers\nOn a related note, I'd like to see more talks/guidelines/books targeting intermediate Rust programmers.\nThis includes discussions on how to structure big projects in Rust and Rust-specific design patterns.\nI want to read more about professional Rust usage and see case-studies from various industries.\nFor example, there is a startup called snips, which builds an on-device voice-assistant using Rust.\nThey integrate with C and C++ libraries and I want to hear more about their journey.\nImprove the RFC process\nI try to follow the RFC process very closely, but my time is limited.\nMy wish is, that I can open any RFC and immediately get its status:\n\nA summary of the discussion with major pros and cons.\nA simple usage example, right at the beginning.\nThe next steps towards stabilization.\n\nFor example, if I look at this (not so) random issue, I don't even know where to start. What are the biggest blockers right now? Who is actively pushing this forward? How can I help out?\nGithub is great for code, but conversations about new features regularly get out of hand.\nThis is not a problem, that is limited to Rust, either. Just look at other big projects like Docker, Kubernetes, or Node.\nMaybe we need a new tool for that.\nThe usual suspects\nIf I could ask for two stable features in 2018, it would be ? in main \nand non-lexical lifetimes.\nThere's more I could mention of course, but I'm not gonna bore you with faster compile times, impl trait, generators, and the like.\nWe're on a good way here, see Nick Cameron's post instead.\nI'm convinced, that by improving documentation and mentorship, we can grow the number of contributors significantly\nand stabilize many highly-anticipated features this year.\n1. Disclaimer: I never watched a single episode of Power Rangers.↩ \n" + "body": "I wrote about the future of Rust before and it seems like nobody stops me from doing it again! Quite the contrary: this time the Rust core team even asked for it. I’m a bit late to the party, but here are my 2 cents about the priorities for Rust in 2018. Who is this guy? There’s a depressingly high chance that we’ve never met before — which is a real shame. For some context: I come from dynamically typed languages like Python and PHP. Rust was the first language that allowed me to write real low-level code without feeling like arguing with a bouncer. To me, Rust is not a fireflower, it’s my own personal Megazord1. I want Rust to win, but for that, we need to tick a few points off the list. Compiler documentation for easier contribution When I was in Columbus, Ohio for Rust Belt Rust, I met Niko Matsakis, Ariel Ben-Yehuda, and Santiago Pastorino. Those fine gentlemen eagerly worked on non-lexical lifetimes during the impl-period. Watching them hack away on the compiler was deeply inspirational to me, and I started wondering if I could contribute, too. Needless to say, the barrier to entry for hacking on the compiler can be quite high. I didn’t contribute anything yet. One thing I’d love to do is to spend short 30-60 minute chunks of time to fix a small thing in the compiler here and there. Could be as simple as renaming a variable, writing a test or adding some documentation. Hence my first wish is, that contributing to the language will become easier. That could be achieved by providing extensive mentorship, more entry-level tickets, and better compiler documentation. All of that was already suggested by Niko. More resources for intermediate programmers On a related note, I’d like to see more talks&#x2F;guidelines&#x2F;books targeting intermediate Rust programmers. This includes discussions on how to structure big projects in Rust and Rust-specific design patterns. I want to read more about professional Rust usage and see case-studies from various industries. For example, there was a startup called snips.ai, which built an on-device voice-assistant using Rust. They integrated with C and C++ libraries and I want to hear more about their journey. Improve the RFC process I try to follow the RFC process very closely, but my time is limited. My wish is, that I can open any RFC and immediately get its status: A summary of the discussion with major pros and cons. A simple usage example, right at the beginning. The next steps towards stabilization. For example, if I look at this (not so) random issue, I don’t even know where to start. What are the biggest blockers right now? Who is actively pushing this forward? How can I help out? Github is great for code, but conversations about new features regularly get out of hand. This is not a problem, that is limited to Rust, either. Just look at other big projects like Docker, Kubernetes, or Node. Maybe we need a new tool for that. The usual suspects If I could ask for two stable features in 2018, it would be ? in main and non-lexical lifetimes. There’s more I could mention of course, but I’m not gonna bore you with faster compile times, impl trait, generators, and the like. We’re on a good way here, see Nick Cameron’s post instead. I’m convinced, that by improving documentation and mentorship, we can grow the number of contributors significantly and stabilize many highly-anticipated features this year. 1. Disclaimer: I never watched a single episode of Power Rangers.↩ " }, { "title": "Functional Programming for Mathematical Computing", "url": "https://endler.dev/2018/functional-mathematics/", - "body": "Programming languages help us describe general solutions for problems; the result just happens to be executable by machines. Every programming language comes with a different set of strengths and weaknesses, one reason being that its syntax and semantics heavily influence the range of problems which can easily be tackled with it.\ntl;dr: I think that functional programming is better suited for mathematical computations than the more common imperative approach.\nUsing built-in abstractions for Mathematics\nThe ideas behind a language (the underlying programming paradigms) are distinctive for the community that builds around it. The developers create a unique ecosystem of ready-to-use libraries and frameworks around the language core. As a consequence, some languages are stronger in areas such as business applications (one could think of Cobol), others work great for systems programming (like C or Rust).\nWhen it comes to solving mathematical and numerical problems with computers, Fortran might come to mind. Although Fortran is a general-purpose language, it is mostly known for scientific computing. Of course, the language was created with that purpose in mind – hence the name, Formula Translation.\nOne reason for its popularity in this area is that it offers some built-in domain-specific keywords to express mathematical concepts, while keeping an eye on performance. For instance, it has a dedicated datatype for complex numbers – COMPLEX – and a keyword named DIMENSION which is quite similar to the mathematical term and can be used to create arrays and vectors.\nImperative vs functional style\nBuilt-in keywords can help expand the expressiveness of a language into a specific problem space, but this approach is severly limited. It’s not feasible to extend the language core ad infinitum; it would just be harder to maintain and take longer to learn. Therefore, most languages provide other ways of abstraction – like functions, subroutines, classes and objects – to split a routine into smaller, more manageable parts. These mechanisms might help to control the complexity of a program, but especially when dealing with mathematical problems, one has to be careful not to obfuscate the solution with boilerplate code.\nSpecimen I - Factorial\nAs an example, the stated problem might be to translate the following formula, which calculates the factorial of a positive number n, into program code:\n\nAn implementation of the above formula using imperative style Java might look like this:\n\npublic static long fact(final int n) {\n if (n < 0) {\n // Negative numbers not allowed\n return 0;\n }\n long prod = 1;\n for (int i = 1; i <= n; ++i) {\n prod *= i;\n }\n return prod;\n}\n\nThis is quite a long solution for such a short problem definition.\n(Note that writing a version with an explicit loop from 1 to n was on purpose; a recursive function would be shorter, but uses a concept which was not introduced by the mathematical formula.)\nAlso, the program contains many language-specific keywords, such as public, static, and System.err.println(). On top of that, the programmer must explicitly provide all data types for the variables in use – a tiresome obligation.\nAll of this obfuscates the mathematical definition.\nCompare this with the following version written in a functional language, like Haskell.\n\nfact n = product [1..n]\n\nThis is an almost direct translation from the problem definition into code. It needs no explicit types, no temporary variables and no access modifiers (such as public).\nSpecimen II - Dot product\nOne could argue that the above Haskell program owes its brevity to the fact, that the language provides just the right abstractions (namely the product keyword and the [1..n] range syntax) for that specific task.\nTherfore let’s examine a simple function which is neither available in Haskell nor in Java: The dot product of two vectors. The mathematical definition is as follows:\n \nFor vectors with three dimensions, it can be written as\n\nFirst, a Haskell implementation:\n\ntype Scalar a = a\ndata Vector a = Vector a a a deriving (Show)\ndot :: (Num a) => Vector a -> Vector a -> Scalar a\n(Vector a1 a2 a3) `dot` (Vector b1 b2 b3) = a1*b1 + a2*b2 + a3*b3\n\nNote, that the mathematical types can be defined in one line each. Further note, that we define the dot function in infix notation, that is, we place the first argument of dot in front of the function name and the second argument behind it. This way, the code looks more like its mathematical equivalent.\nAn example call of the above function would be \n\n(Vector 1 2 3) ’dot’ (Vector 3 2 1)\n\nwhich is short, precise and readable.\nNow, a similar implementation in Java.\n\npublic static class Vector<T extends Number> {\n private T x, y, z;\n\n public Vector(T x, T y, T z) {\n this.x = x;\n this.y = y;\n this.z = z;\n }\n\n public double dot(Vector<?> v) {\n return (x.doubleValue() * v.x.doubleValue() +\n y.doubleValue() * v.y.doubleValue() +\n z.doubleValue() * v.z.doubleValue());\n }\n }\n\n public static void main(String[] args) {\n Vector<Integer> a = new Vector<Integer>(3, 2, 1);\n Vector<Integer> b = new Vector<Integer>(1, 2, 3);\n System.out.println(a.dot(b));\n }\n}\n\nFor a proper textual representation of Vectors, the toString() Method would also need to be overwritten. In Haskell, one can simply derive from the Show typeclass as shown in the code.\nCreating new abstractions\nIf functions and types are not sufficient to write straightforward programs, Haskell also offers simple constructs to create new operators and keywords which extend the language core itself. This makes domain-specific-languages feasible and enables the developer to work more directly on the actual problem instead of working around peculiarities of the programming language itself (such as memory management or array iteration). Haskell embraces this concept; Java has no such functionality.\nConclusion\nI'm not trying to bash Java or worship Haskell here. Both languages have their place.\nI merely picked Java, because lots of programmers can read it.\nThe comparison is more between a functional and an imperative approach for numerical and symbolical programming; and for that, I prefer a functional approach every day. It removes clutter and yields elegant solutions. It provides convenient methods to work on a high level of abstraction and speak in mathematical terms and still, these strengths are disregarded by many programmers.\nAbraham H. Maslow’s observation in his 1966 book The Psychology of Science seems fitting:\n\n“I suppose it is tempting, if the only tool you have is a hammer, to treat everything as if it were a nail.”\n\n" + "body": "Programming languages help us describe general solutions for problems; the result just happens to be executable by machines. Every programming language comes with a different set of strengths and weaknesses, one reason being that its syntax and semantics heavily influence the range of problems which can easily be tackled with it. tl;dr: I think that functional programming is better suited for mathematical computations than the more common imperative approach. Using built-in abstractions for Mathematics The ideas behind a language (the underlying programming paradigms) are distinctive for the community that builds around it. The developers create a unique ecosystem of ready-to-use libraries and frameworks around the language core. As a consequence, some languages are stronger in areas such as business applications (one could think of Cobol), others work great for systems programming (like C or Rust). When it comes to solving mathematical and numerical problems with computers, Fortran might come to mind. Although Fortran is a general-purpose language, it is mostly known for scientific computing. Of course, the language was created with that purpose in mind – hence the name, Formula Translation. One reason for its popularity in this area is that it offers some built-in domain-specific keywords to express mathematical concepts, while keeping an eye on performance. For instance, it has a dedicated datatype for complex numbers – COMPLEX – and a keyword named DIMENSION which is quite similar to the mathematical term and can be used to create arrays and vectors. Imperative vs functional style Built-in keywords can help expand the expressiveness of a language into a specific problem space, but this approach is severly limited. It’s not feasible to extend the language core ad infinitum; it would just be harder to maintain and take longer to learn. Therefore, most languages provide other ways of abstraction – like functions, subroutines, classes and objects – to split a routine into smaller, more manageable parts. These mechanisms might help to control the complexity of a program, but especially when dealing with mathematical problems, one has to be careful not to obfuscate the solution with boilerplate code. Specimen I - Factorial As an example, the stated problem might be to translate the following formula, which calculates the factorial of a positive number n, into program code: An implementation of the above formula using imperative style Java might look like this: public static long fact(final int n) if (n &amp;lt; 0) &#x2F;&#x2F; Negative numbers not allowed return 0; long prod = 1; for (int i = 1; i &amp;lt;= n; ++i) prod *= i; return prod; This is quite a long solution for such a short problem definition. (Note that writing a version with an explicit loop from 1 to n was on purpose; a recursive function would be shorter, but uses a concept which was not introduced by the mathematical formula.) Also, the program contains many language-specific keywords, such as public, static, and System.err.println(). On top of that, the programmer must explicitly provide all data types for the variables in use – a tiresome obligation. All of this obfuscates the mathematical definition. Compare this with the following version written in a functional language, like Haskell. fact n = product [1..n] This is an almost direct translation from the problem definition into code. It needs no explicit types, no temporary variables and no access modifiers (such as public). Specimen II - Dot product One could argue that the above Haskell program owes its brevity to the fact, that the language provides just the right abstractions (namely the product keyword and the [1..n] range syntax) for that specific task. Therfore let’s examine a simple function which is neither available in Haskell nor in Java: The dot product of two vectors. The mathematical definition is as follows: For vectors with three dimensions, it can be written as First, a Haskell implementation: type Scalar a = a data Vector a = Vector a a a deriving (Show) dot :: (Num a) =&amp;gt; Vector a -&amp;gt; Vector a -&amp;gt; Scalar a (Vector a1 a2 a3) `dot` (Vector b1 b2 b3) = a1*b1 + a2*b2 + a3*b3 Note, that the mathematical types can be defined in one line each. Further note, that we define the dot function in infix notation, that is, we place the first argument of dot in front of the function name and the second argument behind it. This way, the code looks more like its mathematical equivalent. An example call of the above function would be (Vector 1 2 3) ’dot’ (Vector 3 2 1) which is short, precise and readable. Now, a similar implementation in Java. public static class Vector&amp;lt;T extends Number&amp;gt; private T x, y, z; public Vector(T x, T y, T z) this.x = x; this.y = y; this.z = z; public double dot(Vector&amp;lt;?&amp;gt; v) return (x.doubleValue() * v.x.doubleValue() + y.doubleValue() * v.y.doubleValue() + z.doubleValue() * v.z.doubleValue()); public static void main(String[] args) Vector&amp;lt;Integer&amp;gt; a = new Vector&amp;lt;Integer&amp;gt;(3, 2, 1); Vector&amp;lt;Integer&amp;gt; b = new Vector&amp;lt;Integer&amp;gt;(1, 2, 3); System.out.println(a.dot(b)); For a proper textual representation of Vectors, the toString() Method would also need to be overwritten. In Haskell, one can simply derive from the Show typeclass as shown in the code. Creating new abstractions If functions and types are not sufficient to write straightforward programs, Haskell also offers simple constructs to create new operators and keywords which extend the language core itself. This makes domain-specific-languages feasible and enables the developer to work more directly on the actual problem instead of working around peculiarities of the programming language itself (such as memory management or array iteration). Haskell embraces this concept; Java has no such functionality. Conclusion I’m not trying to bash Java or worship Haskell here. Both languages have their place. I merely picked Java, because lots of programmers can read it. The comparison is more between a functional and an imperative approach for numerical and symbolical programming; and for that, I prefer a functional approach every day. It removes clutter and yields elegant solutions. It provides convenient methods to work on a high level of abstraction and speak in mathematical terms and still, these strengths are disregarded by many programmers. Abraham H. Maslow’s observation in his 1966 book The Psychology of Science seems fitting: “I suppose it is tempting, if the only tool you have is a hammer, to treat everything as if it were a nail.” " }, { "title": "Rust for Rubyists", "url": "https://endler.dev/2017/rust-for-rubyists/", - "body": "Recently I came across a delightful article on idiomatic Ruby.\nI'm not a good Ruby developer by any means, but I realized, that a lot of the patterns are also quite common in Rust.\nWhat follows is a side-by-side comparison of idiomatic code in both languages.\nThe Ruby code samples are from the original article.\nMap and Higher-Order Functions\nThe first example is a pretty basic iteration over elements of a container using map.\n\n\n\nuser_ids = users.map { |user| user.id }\n\nThe map concept is also pretty standard in Rust.\nCompared to Ruby, we need to be a little more explicit here:\nIf users is a vector of User objects, we first need to create an iterator from it:\n\n\n\nlet user_ids = users.iter().map(|user| user.id);\n\nYou might say that's quite verbose, but this additional abstraction allows us to express an important concept:\nwill the iterator take ownership of the vector, or will it not?\n\nWith iter(), you get a "read-only view" into the vector. After the iteration, it will be unchanged.\nWith into_iter(), you take ownership over the vector. After the iteration, the vector will be gone.\nIn Rust terminology, it will have moved.\nRead some more about the difference between iter() and into_iter() here.\n\nThe above Ruby code can be simplified like this:\n\n\n\nuser_ids = users.map(&:id)\n\nIn Ruby, higher-order functions (like map) take blocks or procs as an argument and the language provides a convenient shortcut for method invocation — &:id is the same as {|o| o.id()}.\nSomething similar could be done in Rust:\n\n\n\nlet id = |u: &User| u.id;\nlet user_ids = users.iter().map(id);\n\nThis is probably not the most idiomatic way to do it, though. What you will see more often is the use of Universal Function Call Syntax in this case:1\n\n\n\nlet user_ids = users.iter().map(User::id);\n\nIn Rust, higher-order functions take functions as an argument. Therefore users.iter().map(Users::id) is more or less equivalent to users.iter().map(|u| u.id()).2\nAlso, map() in Rust returns another iterator and not a collection.\nIf you want a collection, you would have to run collect() on that, as we'll see later.\nIteration with Each\nSpeaking of iteration, one pattern that I see a lot in Ruby code is this:\n\n\n\n["Ruby", "Rust", "Python", "Cobol"].each do |lang|\n puts "Hello #{lang}!"\nend\n\nSince Rust 1.21, this is now also possible:\n\n\n\n["Ruby", "Rust", "Python", "Cobol"]\n .iter()\n .for_each(|lang| println!("Hello {lang}!", lang = lang));\n\nAlthough, more commonly one would write that as a normal for-loop in Rust:\n\n\n\nfor lang in ["Ruby", "Rust", "Python", "Cobol"].iter() {\n println!("Hello {lang}!", lang = lang);\n}\n\nSelect and filter\nLet's say you want to extract only even numbers from a collection in Ruby.\n\n\n\neven_numbers = [1, 2, 3, 4, 5].map { |element| element if element.even? } # [ni, 2, nil, 4, nil]\neven_numbers = even_numbers.compact # [2, 4]\n\nIn this example, before calling compact, our even_numbers array had nil entries.\nWell, in Rust there is no concept of nil or Null. You don't need a compact.\nAlso, map doesn't take predicates. You would use filter for that:\n\n\n\nlet even_numbers = vec![1, 2, 3, 4, 5]\n .iter()\n .filter(|&element| element % 2 == 0);\n\nor, to make a vector out of the result\n\n\n\n// Result: [2, 4]\nlet even_numbers: Vec<i64> = vec![1, 2, 3, 4, 5]\n .into_iter()\n .filter(|element| element % 2 == 0).collect();\n\nSome hints:\n\nI'm using the type hint Vec<i64> here because, without it, Rust does not know what collection I want to build when calling collect.\nvec! is a macro for creating a vector.\nInstead of iter, I use into_iter. This way, I take ownership of the elements in the vector. With iter() I would get a Vec<&i64> instead.\n\nIn Rust, there is no even method on numbers, but that doesn't keep us from defining one!\n\n\n\nlet even = |x: &i64| x % 2 == 0;\nlet even_numbers = vec![1, 2, 3, 4, 5].into_iter().filter(even);\n\nIn a real-world scenario, you would probably use a third-party package (crate) like num for numerical mathematics:\n\n\n\nextern crate num;\nuse num::Integer;\n\nfn main() {\n let even_numbers: Vec<i64> = vec![1, 2, 3, 4, 5]\n .into_iter()\n .filter(|x| x.is_even()).collect();\n}\n\nIn general, it's quite common to use crates in Rust for functionality that is not in the standard lib.\nPart of the reason why this is so well accepted is that cargo is such a rad package manager.\n(Maybe because it was built by no other than Yehuda Katz of Ruby fame. 😉)\nAs mentioned before, Rust does not have nil. However, there is still the concept of operations that can fail.\nThe canonical type to express that is called Result.\nLet's say you want to convert a vector of strings to integers.\n\n\n\nlet maybe_numbers = vec!["1", "2", "nah", "nope", "3"];\nlet numbers: Vec<_> = maybe_numbers\n .into_iter()\n .map(|i| i.parse::<u64>())\n .collect();\n\nThat looks nice, but maybe the output is a little unexpected. numbers will also contain the parsing errors:\n\n\n\n[Ok(1), Ok(2), Err(ParseIntError { kind: InvalidDigit }), Err(ParseIntError { kind: InvalidDigit }), Ok(3)]\n\nSometimes you're just interested in the successful operations.\nAn easy way to filter out the errors is to use filter_map:\n\n\n\nlet maybe_numbers = vec!["1", "2", "nah", "nope", "3"];\nlet numbers: Vec<_> = maybe_numbers\n .into_iter()\n .filter_map(|i| i.parse::<u64>().ok())\n .collect();\n\nI changed two things here:\n\nInstead of map, I'm now using filter_map.\nparse returns a Result, but filter_map expects an Option. We can convert a Result into an Option by calling ok() on it3.\n\nThe return value contains all successfully converted strings:\n\n\n\n[1, 2, 3]\n\nThe filter_map is similar to the select method in Ruby:\n\n\n\n[1, 2, 3, 4, 5].select { |element| element.even? }\n\nRandom numbers\nHere's how to get a random number from an array in Ruby:\n\n\n\n[1, 2, 3].sample\n\nThat's quite nice and idiomatic!\nCompare that to Rust:\n\n\n\nlet mut rng = thread_rng();\nrng.choose(&[1, 2, 3, 4, 5])\n\nFor the code to work, you need the rand crate. Click on the snippet for a running example.\nThere are some differences to Ruby. Namely, we need to be more explicit about what random number generator\nwe want exactly. We decide for a lazily-initialized thread-local random number generator, seeded by the system.\nIn this case, I'm using a slice instead of a vector. The main difference is that the slice has a fixed size while the vector does not.\nWithin the standard library, Rust doesn't have a sample or choose method on the slice itself. \nThat's a design decision: the core of the language is kept small to allow evolving the language in the future.\nThis doesn't mean that you cannot have a nicer implementation today.\nFor instance, you could define a Choose trait and implement it for [T].\n\n\n\nextern crate rand;\nuse rand::{thread_rng, Rng};\n\ntrait Choose<T> {\n fn choose(&self) -> Option<&T>;\n}\n\nimpl<T> Choose<T> for [T] {\n fn choose(&self) -> Option<&T> {\n let mut rng = thread_rng();\n rng.choose(&self)\n }\n}\n\nThis boilerplate could be put into a crate to make it reusable for others.\nWith that, we arrive at a solution that rivals Ruby's elegance.\n\n\n\n[1, 2, 4, 8, 16, 32].choose()\n\nImplicit returns and expressions\nRuby methods automatically return the result of the last statement.\n\n\n\ndef get_user_ids(users)\n users.map(&:id)\nend\n\nSame for Rust. Note the missing semicolon.\n\n\n\nfn get_user_ids(users: &[User]) -> Vec<u64> {\n users.iter().map(|user| user.id).collect()\n}\n\nBut in Rust, this is just the beginning, because everything is an expression.\nThe following block splits a string into characters, removes the h, and returns the result as a HashSet.\nThis HashSet will be assigned to x.\n\n\n\nlet x: HashSet<_> = {\n // Get unique chars of a word {'h', 'e', 'l', 'o'}\n let unique = "hello".chars();\n // filter out the 'h'\n unique.filter(|&char| char != 'h').collect()\n};\n\nSame works for conditions:\n\n\n\nlet x = if 1 > 0 { "absolutely!" } else { "no seriously" };\n\nSince a match statement is also an expression, you can assign the result to a variable, too!\n\n\n\nenum Unit {\n Meter,\n Yard,\n Angstroem,\n Lightyear,\n}\n\nlet length_in_meters = match unit {\n Unit::Meter => 1.0,\n Unit::Yard => 0.91,\n Unit::Angstroem => 0.0000000001,\n Unit::Lightyear => 9.461e+15,\n};\n\nMultiple Assignments\nIn Ruby you can assign multiple values to variables in one step:\n\n\n\ndef values\n [1, 2, 3]\nend\n\none, two, three = values\n\nIn Rust, you can only decompose tuples into tuples, but not a vector into a tuple for example.\nSo this will work:\n\n\n\nlet (one, two, three) = (1, 2, 3);\n\nBut this won't:\n\n\n\nlet (one, two, three) = [1, 2, 3];\n// ^^^^^^^^^^^^^^^^^ expected array of 3 elements, found tuple\n\nNeither will this:\n\n\n\nlet (one, two, three) = [1, 2, 3].iter().collect();\n// a collection of type `(_, _, _)` cannot be built from an iterator over elements of type `&{integer}`\n\nBut with nightly Rust, you can now do this:\n\n\n\nlet [one, two, three] = [1, 2, 3];\n\nOn the other hand, there's a lot more you can do with destructuring apart from multiple assignments. You can write beautiful, ergonomic code using pattern syntax.\n\n\n\nlet x = 4;\nlet y = false;\n\nmatch x {\n 4 | 5 | 6 if y => println!("yes"),\n _ => println!("no"),\n}\n\nTo quote The Book:\n\nThis prints no since the if condition applies to the whole pattern 4 | 5 | 6, not only to the last value 6.\n\nString interpolation\nRuby has extensive string interpolation support.\n\n\n\nprogramming_language = "Ruby"\n"#{programming_language} is a beautiful programming language"\n\nThis can be translated like so:\n\n\n\nlet programming_language = "Rust";\nformat!("{} is also a beautiful programming language", programming_language);\n\nNamed arguments are also possible, albeit much less common:\n\n\n\nprintln!("{language} is also a beautiful programming language", language="Rust");\n\nRust's println!() syntax is even more extensive than Ruby's. Check the docs if you're curious about what else you can do.\nThat’s it!\nRuby comes with syntactic sugar for many common usage patterns, which allows for very elegant code.\nLow-level programming and raw performance are no primary goals of the language.\nIf you do need that, Rust might be a good fit, because it provides fine-grained hardware control with comparable ergonomics.\nIf in doubt, Rust favors explicitness, though; it eschews magic.\nDid I whet your appetite for idiomatic Rust? Have a look at this Github project. I'd be thankful for contributions.\nFootnotes\n1. Thanks to Florian Gilcher for the hint.↩\n2. Thanks to masklin for pointing out multiple inaccuracies.↩\n3. In the first version, I sait that ok() would convert a Result into a boolean, which was wrong. Thanks to isaacg for the correction.↩\n" + "body": "Recently I came across a delightful article on idiomatic Ruby. I’m not a good Ruby developer by any means, but I realized, that a lot of the patterns are also quite common in Rust. What follows is a side-by-side comparison of idiomatic code in both languages. The Ruby code samples are from the original article. Map and Higher-Order Functions The first example is a pretty basic iteration over elements of a container using map. user_ids = users.map |user| user.id The map concept is also pretty standard in Rust. Compared to Ruby, we need to be a little more explicit here: If users is a vector of User objects, we first need to create an iterator from it: let user_ids = users.iter().map(|user| user.id); You might say that’s quite verbose, but this additional abstraction allows us to express an important concept: will the iterator take ownership of the vector, or will it not? With iter(), you get a “read-only view” into the vector. After the iteration, it will be unchanged. With into_iter(), you take ownership over the vector. After the iteration, the vector will be gone. In Rust terminology, it will have moved. Read some more about the difference between iter() and into_iter() here. The above Ruby code can be simplified like this: user_ids = users.map(&amp;amp;:id) In Ruby, higher-order functions (like map) take blocks or procs as an argument and the language provides a convenient shortcut for method invocation — &amp;amp;:id is the same as |o| o.id() . Something similar could be done in Rust: let id = |u: &amp;amp;User| u.id; let user_ids = users.iter().map(id); This is probably not the most idiomatic way to do it, though. What you will see more often is the use of Universal Function Call Syntax in this case:1 let user_ids = users.iter().map(User::id); In Rust, higher-order functions take functions as an argument. Therefore users.iter().map(Users::id) is more or less equivalent to users.iter().map(|u| u.id()).2 Also, map() in Rust returns another iterator and not a collection. If you want a collection, you would have to run collect() on that, as we’ll see later. Iteration with Each Speaking of iteration, one pattern that I see a lot in Ruby code is this: [&amp;quot;Ruby&amp;quot;, &amp;quot;Rust&amp;quot;, &amp;quot;Python&amp;quot;, &amp;quot;Cobol&amp;quot;].each do |lang| puts &amp;quot;Hello # lang !&amp;quot; end Since Rust 1.21, this is now also possible: [&amp;quot;Ruby&amp;quot;, &amp;quot;Rust&amp;quot;, &amp;quot;Python&amp;quot;, &amp;quot;Cobol&amp;quot;] .iter() .for_each(|lang| println!(&amp;quot;Hello lang !&amp;quot;, lang = lang)); Although, more commonly one would write that as a normal for-loop in Rust: for lang in [&amp;quot;Ruby&amp;quot;, &amp;quot;Rust&amp;quot;, &amp;quot;Python&amp;quot;, &amp;quot;Cobol&amp;quot;].iter() println!(&amp;quot;Hello lang !&amp;quot;, lang = lang); Select and filter Let’s say you want to extract only even numbers from a collection in Ruby. even_numbers = [1, 2, 3, 4, 5].map |element| element if element.even? # [ni, 2, nil, 4, nil] even_numbers = even_numbers.compact # [2, 4] In this example, before calling compact, our even_numbers array had nil entries. Well, in Rust there is no concept of nil or Null. You don’t need a compact. Also, map doesn’t take predicates. You would use filter for that: let even_numbers = vec![1, 2, 3, 4, 5] .iter() .filter(|&amp;amp;element| element % 2 == 0); or, to make a vector out of the result &#x2F;&#x2F; Result: [2, 4] let even_numbers: Vec&amp;lt;i64&amp;gt; = vec![1, 2, 3, 4, 5] .into_iter() .filter(|element| element % 2 == 0).collect(); Some hints: I’m using the type hint Vec&amp;lt;i64&amp;gt; here because, without it, Rust does not know what collection I want to build when calling collect. vec! is a macro for creating a vector. Instead of iter, I use into_iter. This way, I take ownership of the elements in the vector. With iter() I would get a Vec&amp;lt;&amp;amp;i64&amp;gt; instead. In Rust, there is no even method on numbers, but that doesn’t keep us from defining one! let even = |x: &amp;amp;i64| x % 2 == 0; let even_numbers = vec![1, 2, 3, 4, 5].into_iter().filter(even); In a real-world scenario, you would probably use a third-party package (crate) like num for numerical mathematics: extern crate num; use num::Integer; fn main() let even_numbers: Vec&amp;lt;i64&amp;gt; = vec![1, 2, 3, 4, 5] .into_iter() .filter(|x| x.is_even()).collect(); In general, it’s quite common to use crates in Rust for functionality that is not in the standard lib. Part of the reason why this is so well accepted is that cargo is such a rad package manager. (Maybe because it was built by no other than Yehuda Katz of Ruby fame. 😉) As mentioned before, Rust does not have nil. However, there is still the concept of operations that can fail. The canonical type to express that is called Result. Let’s say you want to convert a vector of strings to integers. let maybe_numbers = vec![&amp;quot;1&amp;quot;, &amp;quot;2&amp;quot;, &amp;quot;nah&amp;quot;, &amp;quot;nope&amp;quot;, &amp;quot;3&amp;quot;]; let numbers: Vec&amp;lt;_&amp;gt; = maybe_numbers .into_iter() .map(|i| i.parse::&amp;lt;u64&amp;gt;()) .collect(); That looks nice, but maybe the output is a little unexpected. numbers will also contain the parsing errors: [Ok(1), Ok(2), Err(ParseIntError kind: InvalidDigit ), Err(ParseIntError kind: InvalidDigit ), Ok(3)] Sometimes you’re just interested in the successful operations. An easy way to filter out the errors is to use filter_map: let maybe_numbers = vec![&amp;quot;1&amp;quot;, &amp;quot;2&amp;quot;, &amp;quot;nah&amp;quot;, &amp;quot;nope&amp;quot;, &amp;quot;3&amp;quot;]; let numbers: Vec&amp;lt;_&amp;gt; = maybe_numbers .into_iter() .filter_map(|i| i.parse::&amp;lt;u64&amp;gt;().ok()) .collect(); I changed two things here: Instead of map, I’m now using filter_map. parse returns a Result, but filter_map expects an Option. We can convert a Result into an Option by calling ok() on it3. The return value contains all successfully converted strings: [1, 2, 3] The filter_map is similar to the select method in Ruby: [1, 2, 3, 4, 5].select |element| element.even? Random numbers Here’s how to get a random number from an array in Ruby: [1, 2, 3].sample That’s quite nice and idiomatic! Compare that to Rust: let mut rng = thread_rng(); rng.choose(&amp;amp;[1, 2, 3, 4, 5]) For the code to work, you need the rand crate. Click on the snippet for a running example. There are some differences to Ruby. Namely, we need to be more explicit about what random number generator we want exactly. We decide for a lazily-initialized thread-local random number generator, seeded by the system. In this case, I’m using a slice instead of a vector. The main difference is that the slice has a fixed size while the vector does not. Within the standard library, Rust doesn’t have a sample or choose method on the slice itself. That’s a design decision: the core of the language is kept small to allow evolving the language in the future. This doesn’t mean that you cannot have a nicer implementation today. For instance, you could define a Choose trait and implement it for [T]. extern crate rand; use rand:: thread_rng, Rng ; trait Choose&amp;lt;T&amp;gt; fn choose(&amp;amp;self) -&amp;gt; Option&amp;lt;&amp;amp;T&amp;gt;; impl&amp;lt;T&amp;gt; Choose&amp;lt;T&amp;gt; for [T] fn choose(&amp;amp;self) -&amp;gt; Option&amp;lt;&amp;amp;T&amp;gt; let mut rng = thread_rng(); rng.choose(&amp;amp;self) This boilerplate could be put into a crate to make it reusable for others. With that, we arrive at a solution that rivals Ruby’s elegance. [1, 2, 4, 8, 16, 32].choose() Implicit returns and expressions Ruby methods automatically return the result of the last statement. def get_user_ids(users) users.map(&amp;amp;:id) end Same for Rust. Note the missing semicolon. fn get_user_ids(users: &amp;amp;[User]) -&amp;gt; Vec&amp;lt;u64&amp;gt; users.iter().map(|user| user.id).collect() But in Rust, this is just the beginning, because everything is an expression. The following block splits a string into characters, removes the h, and returns the result as a HashSet. This HashSet will be assigned to x. let x: HashSet&amp;lt;_&amp;gt; = &#x2F;&#x2F; Get unique chars of a word &amp;#39;h&amp;#39;, &amp;#39;e&amp;#39;, &amp;#39;l&amp;#39;, &amp;#39;o&amp;#39; let unique = &amp;quot;hello&amp;quot;.chars(); &#x2F;&#x2F; filter out the &amp;#39;h&amp;#39; unique.filter(|&amp;amp;char| char != &amp;#39;h&amp;#39;).collect() ; Same works for conditions: let x = if 1 &amp;gt; 0 &amp;quot;absolutely!&amp;quot; else &amp;quot;no seriously&amp;quot; ; Since a match statement is also an expression, you can assign the result to a variable, too! enum Unit Meter, Yard, Angstroem, Lightyear, let length_in_meters = match unit Unit::Meter =&amp;gt; 1.0, Unit::Yard =&amp;gt; 0.91, Unit::Angstroem =&amp;gt; 0.0000000001, Unit::Lightyear =&amp;gt; 9.461e+15, ; Multiple Assignments In Ruby you can assign multiple values to variables in one step: def values [1, 2, 3] end one, two, three = values In Rust, you can only decompose tuples into tuples, but not a vector into a tuple for example. So this will work: let (one, two, three) = (1, 2, 3); But this won’t: let (one, two, three) = [1, 2, 3]; &#x2F;&#x2F; ^^^^^^^^^^^^^^^^^ expected array of 3 elements, found tuple Neither will this: let (one, two, three) = [1, 2, 3].iter().collect(); &#x2F;&#x2F; a collection of type `(_, _, _)` cannot be built from an iterator over elements of type `&amp;amp; integer ` But with nightly Rust, you can now do this: let [one, two, three] = [1, 2, 3]; On the other hand, there’s a lot more you can do with destructuring apart from multiple assignments. You can write beautiful, ergonomic code using pattern syntax. let x = 4; let y = false; match x 4 | 5 | 6 if y =&amp;gt; println!(&amp;quot;yes&amp;quot;), _ =&amp;gt; println!(&amp;quot;no&amp;quot;), To quote The Book: This prints no since the if condition applies to the whole pattern 4 | 5 | 6, not only to the last value 6. String interpolation Ruby has extensive string interpolation support. programming_language = &amp;quot;Ruby&amp;quot; &amp;quot;# programming_language is a beautiful programming language&amp;quot; This can be translated like so: let programming_language = &amp;quot;Rust&amp;quot;; format!(&amp;quot; is also a beautiful programming language&amp;quot;, programming_language); Named arguments are also possible, albeit much less common: println!(&amp;quot; language is also a beautiful programming language&amp;quot;, language=&amp;quot;Rust&amp;quot;); Rust’s println!() syntax is even more extensive than Ruby’s. Check the docs if you’re curious about what else you can do. That’s it! Ruby comes with syntactic sugar for many common usage patterns, which allows for very elegant code. Low-level programming and raw performance are no primary goals of the language. If you do need that, Rust might be a good fit, because it provides fine-grained hardware control with comparable ergonomics. If in doubt, Rust favors explicitness, though; it eschews magic. Did I whet your appetite for idiomatic Rust? Have a look at this Github project. I’d be thankful for contributions. Footnotes 1. Thanks to Florian Gilcher for the hint.↩ 2. Thanks to masklin for pointing out multiple inaccuracies.↩ 3. In the first version, I sait that ok() would convert a Result into a boolean, which was wrong. Thanks to isaacg for the correction.↩ " }, { "title": "Making Myself Obsolete", "url": "https://endler.dev/2017/obsolete/", - "body": "\n \n \n The Stegosaurus had better days 150 million years ago.\n \n Paleontologists once thought it had a brain in its butt.\n \n\nIn December 2015 I was looking for static analysis tools to integrate into trivago's CI process.\nThe idea was to detect typical programming mistakes automatically.\nThat's quite a common thing, and there are lots of helpful tools out there which fit the bill.\nSo I looked for a list of tools...\nTo my surprise, the only list I found was on Wikipedia — and it was outdated.\nThere was no such project on Github, where most modern static analysis tools were hosted.\nWithout overthinking it, I opened up my editor and wrote down a few tools I found through my initial research. After that, I pushed the list to Github.\nI called the project Awesome Static Analysis.\nFast forward two years and the list has grown quite a bit.\nSo far, it has 75 contributors, 277 forks and received over 2000 stars. (Thanks for all the support!)\n(Update May 2018: 91 contributors, 363 forks, over 3000 stars)\nAround 1000 unique visitors find the list every week. Not much by any means, but I feel obliged to keep it up-to-date\nbecause it has become an essential source of information for many people.\nIt now lists around 300 tools for static analysis. Everything from Ada to TypeScript is on there.\nWhat I find particularly motivating is, that now the authors themselves create pull requests to add their tools!\nThere was one problem though: The list of pull requests got longer and longer, as I was busy doing other things.\n\nAdding contributors\nI always try to make team members out of regular contributors. My friend and colleague Andy Grunwald as well as Ouroboros Chrysopoeia are both valuable collaborators. They help me weed out new PRs whenever they find the time.\nBut let's face it: checking the pull requests is a dull, manual task.\nWhat needs to be checked for each new tool can be summarized like this:\n\nFormatting rules are satisfied\nProject URL is reachable\nLicense annotation is correct\nTools of each section are alphabetically ordered\nDescription is not too long\n\nI guess it's obvious what we should do with that checklist: automate it!\nA linter for linting linters\nSo why not write an analysis tool, which checks our list of analysis tools!\nWhat sounds pretty meta, is actually pretty straightforward.\nWith every pull request, we trigger our bot, which checks the above rules and responds with a result.\nThe first step was to read the Github documentation about building a CI server.\nJust for fun, I wanted to create the bot in Rust.\nThe two most popular Github clients for Rust were github-rs and hubcaps.\nBoth looked pretty neat, but then I found afterparty, a "Github webhook server".\nThe example looked fabulous:\n\n#[macro_use]\nextern crate log;\nextern crate env_logger;\nextern crate afterparty;\nextern crate hyper;\n\nuse afterparty::{Delivery, Hub};\n\nuse hyper::Server;\n\npub fn main() {\n env_logger::init().unwrap();\n let addr = format!("0.0.0.0:{}", 4567);\n let mut hub = Hub::new();\n hub.handle("pull_request", |delivery: &Delivery| {\n match delivery.payload {\n Event::PullRequest { ref action, ref sender, .. } => {\n // TODO: My code here!\n println!("sender {} action {}", sender.login, action)\n }\n _ => (),\n }\n });\n let srvc = Server::http(&addr[..])\n .unwrap()\n .handle(hub);\n println!("listening on {}", addr);\n srvc.unwrap();\n}\n\nThis allowed me to focus on the actual analysis code,\nwhich makes for a pretty boring read. It mechanically checks for the things mentioned above and could be written in any language.\nIf you want to have a look (or even contribute!), check out the repo.\nTalking to Github\nAfter the analysis code was done, I had a bot, running locally, waiting for incoming pull requests.\nBut how could I talk to Github?\nI found out, that I should use the Status API\nand send a POST request to /repos/mre/awesome-static-analysis/statuses/:sha\n(:sha is the commit ID that points to the HEAD of the pull request):\n\n{\n "state": "success",\n "description": "The build succeeded!"\n}\n\nI could have used one of the existing Rust Github clients, but I decided to write a simple function to update the pull request status code.\n\nfn set_status(status: Status, desc: String, repo: &str, sha: &str) -> Result<reqwest::Response> {\n let token = env::var("GITHUB_TOKEN")?;\n let client = reqwest::Client::new();\n let mut params = HashMap::new();\n params.insert("state", format!("{}", status));\n params.insert("description", desc);\n println!("Sending status: {:#?}", params);\n\n let status_url = format!("https://api.github.com/repos/{}/statuses/{}", repo, sha);\n println!("Status url: {}", status_url);\n Ok(client\n .request(\n reqwest::Method::Post,\n &format!(\n "{}?access_token={}",\n status_url,\n token,\n ),\n )\n .json(&params)\n .send()?)\n}\n\nYou can see that I pass in a Github token from the environment and then I send the JSON payload as a post request using the reqwest library.\nThat turned out to become a problem in the end: while afterparty was using version 0.9 of hyper, reqwest was using 0.11. Unfortunately, these two versions depend on a different build of the openssl-sys bindings. That's a well-known problem and the only way to fix it, is to resolve the conflict.\nI was stuck for a while, but then I saw, that there was an open pull request to upgrade afterparty to hyper 0.10.\nSo inside my Cargo.toml, I locked the version of afterparty to the version of the pull request:\n\n[dependencies]\nafterparty = { git = "https://github.com/ms705/afterparty" }\n\nThis fixed the build, and I could finally move on.\nDeployment\nI needed a place to host the bot.\nPreferably for free, as it was a non-profit Open Source project.\nAlso, the provider would have to run binaries.\nFor quite some time, I was following a product named zeit.\nIt runs any Docker container using an intuitive command line interface called now.\n\n \nYour browser does not support playing mp4 files.\n \nI fell in love the first time I saw their demo on the site, so I wanted to give it a try.\nSo I added a multi-stage Dockerfile to my project:\n\nFROM rust as builder\nCOPY . /usr/src/app \nWORKDIR /usr/src/app \nRUN cargo build --release\n\nFROM debian:stretch\nRUN apt update \\\n && apt install -y libssl1.1 ca-certificates \\\n && apt clean -y \\\n && apt autoclean -y \\\n && apt autoremove -y\nCOPY --from=builder target/release/check .\nEXPOSE 4567\nENTRYPOINT ["./check"]\nCMD ["--help"]\n\nThe first part would build a static binary, the second part would run it at container startup.\nWell, that didn't work, because zeit does not support multi-stage builds yet.\nThe workaround was to split up the Dockerfile into two and connect them both with a Makefile. Makefiles are pretty powerful, you know?\nWith that, I had all the parts for deployment together.\n\n# Build Rust binary for Linux\ndocker run --rm -v $(CURDIR):/usr/src/ci -w /usr/src/ci rust cargo build --release\n\n# Deploy Docker images built from the local Dockerfile\nnow deploy --force --public -e GITHUB_TOKEN=${GITHUB_TOKEN}\n\n# Set domain name of new build to `check.now.sh`\n# (The deployment URL was copied to the clipboard and is retrieved with pbpaste on macOS)\nnow alias `pbpaste` check.now.sh\n\nHere's the output of the deploy using now:\n\n> Deploying ~/Code/private/awesome-static-analysis-ci/deploy\n> Ready! https://deploy-sjbiykfvtx.now.sh (copied to clipboard) [2s]\n> Initializing…\n> Initializing…\n> Building\n> ▲ docker build\nSending build context to Docker daemon 2.048 kBkB\n> Step 1 : FROM mre0/ci:latest\n> latest: Pulling from mre0/ci\n> ...\n> Digest: sha256:5ad07c12184755b84ca1b587e91b97c30f7d547e76628645a2c23dc1d9d3fd4b\n> Status: Downloaded newer image for mre0/ci:latest\n> ---> 8ee1b20de28b\n> Successfully built 8ee1b20de28b\n> ▲ Storing image\n> ▲ Deploying image\n> ▲ Container started\n> listening on 0.0.0.0:4567\n> Deployment complete!\n\nThe last step was to add check.now.sh as a webhook inside the awesome-static-analysis project settings.\nNow, whenever a new pull request is coming in, you see that little bot getting active!\n\nOutcome and future plans\nI am very pleased with my choice of tools: afterparty saved me from a lot of manual work, while zeit made deployment really easy.\nIt feels like Amazon Lambda on steroids.\nIf you look at the code and the commits for my bot, you can see all my little missteps, until I got everything just right. Turns out, parsing human-readable text is tedious.\nTherefore I was thinking about turning the list of analysis tools into a structured format like YAML. This would greatly simplify the parsing and have the added benefit of having a machine-readable list of tools that can be used for other projects.\nUpdate May 2018\nWhile attending the WeAreDevelopers conference in Vienna (can recommend that), I moved the CI pipeline from zeit.co to Travis CI.\nThe reason was, that I wanted the linting code next to the project, which greatly simplified things.\nFirst and foremost I don't need the web request handling code anymore, because travis takes care of that.\nIf you like, you can compare the old and the new version.\n" + "body": " The Stegosaurus had better days 150 million years ago. Source: Paleontologists once thought it had a brain in its butt. In December 2015 I was looking for static analysis tools to integrate into trivago’s CI process. The idea was to detect typical programming mistakes automatically. That’s quite a common thing, and there are lots of helpful tools out there which fit the bill. So I looked for a list of tools… To my surprise, the only list I found was on Wikipedia — and it was outdated. There was no such project on Github, where most modern static analysis tools were hosted. Without overthinking it, I opened up my editor and wrote down a few tools I found through my initial research. After that, I pushed the list to Github. I called the project Awesome Static Analysis. Fast forward two years and the list has grown quite a bit. So far, it has 75 contributors, 277 forks and received over 2000 stars. (Thanks for all the support!) (Update May 2018: 91 contributors, 363 forks, over 3000 stars) Around 1000 unique visitors find the list every week. Not much by any means, but I feel obliged to keep it up-to-date because it has become an essential source of information for many people. It now lists around 300 tools for static analysis. Everything from Ada to TypeScript is on there. What I find particularly motivating is, that now the authors themselves create pull requests to add their tools! There was one problem though: The list of pull requests got longer and longer, as I was busy doing other things. The list of Github Pull requests for awesome-static-analysis Adding contributors I always try to make team members out of regular contributors. My friend and colleague Andy Grunwald as well as Ouroboros Chrysopoeia are both valuable collaborators. They help me weed out new PRs whenever they find the time. But let’s face it: checking the pull requests is a dull, manual task. What needs to be checked for each new tool can be summarized like this: Formatting rules are satisfied Project URL is reachable License annotation is correct Tools of each section are alphabetically ordered Description is not too long I guess it’s obvious what we should do with that checklist: automate it! A linter for linting linters So why not write an analysis tool, which checks our list of analysis tools! What sounds pretty meta, is actually pretty straightforward. With every pull request, we trigger our bot, which checks the above rules and responds with a result. The first step was to read the Github documentation about building a CI server. Just for fun, I wanted to create the bot in Rust. The two most popular Github clients for Rust were github-rs (now deprecated) and hubcaps. Both looked pretty neat, but then I found afterparty, a “Github webhook server”. The example looked fabulous: #[macro_use] extern crate log; extern crate env_logger; extern crate afterparty; extern crate hyper; use afterparty:: Delivery, Hub ; use hyper::Server; pub fn main() env_logger::init().unwrap(); let addr = format!(&amp;quot;0.0.0.0: &amp;quot;, 4567); let mut hub = Hub::new(); hub.handle(&amp;quot;pull_request&amp;quot;, |delivery: &amp;amp;Delivery| match delivery.payload Event::PullRequest ref action, ref sender, .. =&amp;gt; &#x2F;&#x2F; TODO: My code here! println!(&amp;quot;sender action &amp;quot;, sender.login, action) _ =&amp;gt; (), ); let srvc = Server::http(&amp;amp;addr[..]) .unwrap() .handle(hub); println!(&amp;quot;listening on &amp;quot;, addr); srvc.unwrap(); This allowed me to focus on the actual analysis code, which makes for a pretty boring read. It mechanically checks for the things mentioned above and could be written in any language. If you want to have a look (or even contribute!), check out the repo. Talking to Github After the analysis code was done, I had a bot, running locally, waiting for incoming pull requests. But how could I talk to Github? I found out, that I should use the Status API and send a POST request to &#x2F;repos&#x2F;mre&#x2F;awesome-static-analysis&#x2F;statuses&#x2F;:sha (:sha is the commit ID that points to the HEAD of the pull request): &amp;quot;state&amp;quot;: &amp;quot;success&amp;quot;, &amp;quot;description&amp;quot;: &amp;quot;The build succeeded!&amp;quot; I could have used one of the existing Rust Github clients, but I decided to write a simple function to update the pull request status code. fn set_status(status: Status, desc: String, repo: &amp;amp;str, sha: &amp;amp;str) -&amp;gt; Result&amp;lt;reqwest::Response&amp;gt; let token = env::var(&amp;quot;GITHUB_TOKEN&amp;quot;)?; let client = reqwest::Client::new(); let mut params = HashMap::new(); params.insert(&amp;quot;state&amp;quot;, format!(&amp;quot; &amp;quot;, status)); params.insert(&amp;quot;description&amp;quot;, desc); println!(&amp;quot;Sending status: :#? &amp;quot;, params); let status_url = format!(&amp;quot;https:&#x2F;&#x2F;api.github.com&#x2F;repos&#x2F; &#x2F;statuses&#x2F; &amp;quot;, repo, sha); println!(&amp;quot;Status url: &amp;quot;, status_url); Ok(client .request( reqwest::Method::Post, &amp;amp;format!( &amp;quot; ?access_token= &amp;quot;, status_url, token, ), ) .json(&amp;amp;params) .send()?) You can see that I pass in a Github token from the environment and then I send the JSON payload as a post request using the reqwest library. That turned out to become a problem in the end: while afterparty was using version 0.9 of hyper, reqwest was using 0.11. Unfortunately, these two versions depend on a different build of the openssl-sys bindings. That’s a well-known problem and the only way to fix it, is to resolve the conflict. I was stuck for a while, but then I saw, that there was an open pull request to upgrade afterparty to hyper 0.10. So inside my Cargo.toml, I locked the version of afterparty to the version of the pull request: [dependencies] afterparty = git = &amp;quot;https:&#x2F;&#x2F;github.com&#x2F;ms705&#x2F;afterparty&amp;quot; This fixed the build, and I could finally move on. Deployment I needed a place to host the bot. Preferably for free, as it was a non-profit Open Source project. Also, the provider would have to run binaries. For quite some time, I was following a product named zeit. It runs any Docker container using an intuitive command line interface called now. I fell in love the first time I saw their demo on the site, so I wanted to give it a try. So I added a multi-stage Dockerfile to my project: FROM rust as builder COPY . &#x2F;usr&#x2F;src&#x2F;app WORKDIR &#x2F;usr&#x2F;src&#x2F;app RUN cargo build --release FROM debian:stretch RUN apt update &amp;amp;&amp;amp; apt install -y libssl1.1 ca-certificates &amp;amp;&amp;amp; apt clean -y &amp;amp;&amp;amp; apt autoclean -y &amp;amp;&amp;amp; apt autoremove -y COPY --from=builder target&#x2F;release&#x2F;check . EXPOSE 4567 ENTRYPOINT [&amp;quot;.&#x2F;check&amp;quot;] CMD [&amp;quot;--help&amp;quot;] The first part would build a static binary, the second part would run it at container startup. Well, that didn’t work, because zeit does not support multi-stage builds yet. The workaround was to split up the Dockerfile into two and connect them both with a Makefile. Makefiles are pretty powerful, you know? With that, I had all the parts for deployment together. # Build Rust binary for Linux docker run --rm -v $(CURDIR):&#x2F;usr&#x2F;src&#x2F;ci -w &#x2F;usr&#x2F;src&#x2F;ci rust cargo build --release # Deploy Docker images built from the local Dockerfile now deploy --force --public -e GITHUB_TOKEN=$ GITHUB_TOKEN # Set domain name of new build to `check.now.sh` # (The deployment URL was copied to the clipboard and is retrieved with pbpaste on macOS) now alias `pbpaste` check.now.sh Here’s the output of the deploy using now: &amp;gt; Deploying ~&#x2F;Code&#x2F;private&#x2F;awesome-static-analysis-ci&#x2F;deploy &amp;gt; Ready! https:&#x2F;&#x2F;deploy-sjbiykfvtx.now.sh (copied to clipboard) [2s] &amp;gt; Initializing… &amp;gt; Initializing… &amp;gt; Building &amp;gt; ▲ docker build Sending build context to Docker daemon 2.048 kBkB &amp;gt; Step 1 : FROM mre0&#x2F;ci:latest &amp;gt; latest: Pulling from mre0&#x2F;ci &amp;gt; ... &amp;gt; Digest: sha256:5ad07c12184755b84ca1b587e91b97c30f7d547e76628645a2c23dc1d9d3fd4b &amp;gt; Status: Downloaded newer image for mre0&#x2F;ci:latest &amp;gt; ---&amp;gt; 8ee1b20de28b &amp;gt; Successfully built 8ee1b20de28b &amp;gt; ▲ Storing image &amp;gt; ▲ Deploying image &amp;gt; ▲ Container started &amp;gt; listening on 0.0.0.0:4567 &amp;gt; Deployment complete! The last step was to add check.now.sh as a webhook inside the awesome-static-analysis project settings. Now, whenever a new pull request is coming in, you see that little bot getting active! Outcome and future plans I am very pleased with my choice of tools: afterparty saved me from a lot of manual work, while zeit made deployment really easy. It feels like Amazon Lambda on steroids. If you look at the code and the commits for my bot, you can see all my little missteps, until I got everything just right. Turns out, parsing human-readable text is tedious. Therefore I was thinking about turning the list of analysis tools into a structured format like YAML. This would greatly simplify the parsing and have the added benefit of having a machine-readable list of tools that can be used for other projects. Update May 2018 While attending the WeAreDevelopers conference in Vienna (can recommend that), I moved the CI pipeline from zeit.co to Travis CI. The reason was, that I wanted the linting code next to the project, which greatly simplified things. First and foremost I don’t need the web request handling code anymore, because travis takes care of that. If you like, you can compare the old and the new version. " }, { "title": "Modern Day Annoyances - Digital Clocks", "url": "https://endler.dev/2017/digitial-clocks/", - "body": "This morning I woke up to the beeping noise of our oven's alarm clock.\nThe reason was that I tried to correct the oven's local time the day before — and I pushed the wrong buttons.\nAs a result I didn't set the correct time, instead, I set a cooking timer... and that's what woke me up today.\n\n\n \n\nLet's add a clock to the microwave!\nOn occasions like these, I wonder why there's a digital clock on every single household device these days.\nThey're integrated into microwaves, fridges, ovens, dishwashers, dryers, mixers — and that's just the kitchen!\nThere is an inflation of digital clocks on modern-day devices.\nA lot of times I was wondering why that is the case. Here's my best guess:\nIt's easier to add a useless digital clock to the design than to leave it out.\nSay you are the engineer responsible for the control panel of a run-of-the-mill microwave.\nThe microwave chip comes with a digital timer, which is perfect for showing the remaining time until the food is warmed up.\nNow the question is, what will the timer show when you don't want to heat anything?\nWell, why not show the current time?\nIt's unobtrusive and adds value.\nExcept that these digital clocks can be quite annoying:\n\nThey run out of sync and show the wrong time.\nThey get reset when being plugged off or there's a power outage. (That's the dreaded, blinking 00:00 we all learned to love.)\nThey don't automatically switch between summer and winter time (hey Germany!).\n\nThat's why I constantly need to look after those clocks.\nLet me tell you a secret:\nWhen I'm not warming stuff in the oven, I don't want it to tell me the local time. I want the stove to be off.\nWhy I have trouble setting the clock on our oven\nOur oven has three buttons related to time: plus, minus and a clock symbol.\nTo set the time, you push the clock symbol. An arrow appears and the display changes to 00:00. You press time again and another arrow appears.\nPressing it two more times shows a blinking clock symbol. Then you can use the + and - buttons to adjust the time. After that, you wait to confirm.\nEasy!\nThe problem is, there is no immediate relationship between the controls and the result in the world.\nThe underlying concept is called mapping and is prevalent in interface design.\nTo add some functionality to a device you have two options:\n\nAdd more buttons.\nTeach an existing button a new trick.\n\nOption 1 might dilute your beautiful design, while option 2 might mean frustration for your users.\nNeither option is appealing.\nOur oven maps multiple functions to the same button.\nBut the most annoying thing is, that each device has a different mapping.\nLearning to set the time on my oven won't help me with the dishwasher, which sports an entirely different interface!\nTakeaways\nGood industrial designs are few and far between.\nA clock on your product will most likely not add any additional value.\nIn the best case it might be an annoyance, in the worst case it's harmfully misleading.\nWhen given a choice, I prefer home appliances without clocks.\nLooking at today's market, that's harder than it sounds.\nArguably, a device with a clock is cheaper than one without; just because the ones with timers get produced more often.\nNow I can understand why it took Steve Jobs two weeks to decide on a washing machine:\n\nWe spent some time in our family talking about what's the trade-off we want to make.\nWe spent about two weeks talking about this. Every night at the dinner table.\n\nHe chose a Miele Washing machine in the end - without a digital clock, I assume.\n" + "body": "This morning I woke up to the beeping noise of our oven’s alarm clock. The reason was that I tried to correct the oven’s local time the day before — and I pushed the wrong buttons. As a result I didn’t set the correct time, instead, I set a cooking timer… and that’s what woke me up today. Let’s add a clock to the microwave! On occasions like these, I wonder why there’s a digital clock on every single household device these days. They’re integrated into microwaves, fridges, ovens, dishwashers, dryers, mixers — and that’s just the kitchen! There is an inflation of digital clocks on modern-day devices. A lot of times I was wondering why that is the case. Here’s my best guess: It’s easier to add a useless digital clock to the design than to leave it out. Say you are the engineer responsible for the control panel of a run-of-the-mill microwave. The microwave chip comes with a digital timer, which is perfect for showing the remaining time until the food is warmed up. Now the question is, what will the timer show when you don’t want to heat anything? Well, why not show the current time? It’s unobtrusive and adds value. Except that these digital clocks can be quite annoying: They run out of sync and show the wrong time. They get reset when being plugged off or there’s a power outage. (That’s the dreaded, blinking 00:00 we all learned to love.) They don’t automatically switch between summer and winter time (hey Germany!). That’s why I constantly need to look after those clocks. Let me tell you a secret: When I’m not warming stuff in the oven, I don’t want it to tell me the local time. I want the stove to be off. Why I have trouble setting the clock on our oven Our oven has three buttons related to time: plus, minus and a clock symbol. To set the time, you push the clock symbol. An arrow appears and the display changes to 00:00. You press time again and another arrow appears. Pressing it two more times shows a blinking clock symbol. Then you can use the + and - buttons to adjust the time. After that, you wait to confirm. Easy! The problem is, there is no immediate relationship between the controls and the result in the world. The underlying concept is called mapping and is prevalent in interface design. To add some functionality to a device you have two options: Add more buttons. Teach an existing button a new trick. Option 1 might dilute your beautiful design, while option 2 might mean frustration for your users. Neither option is appealing. Our oven maps multiple functions to the same button. But the most annoying thing is, that each device has a different mapping. Learning to set the time on my oven won’t help me with the dishwasher, which sports an entirely different interface! Takeaways Good industrial designs are few and far between. A clock on your product will most likely not add any additional value. In the best case it might be an annoyance, in the worst case it’s harmfully misleading. When given a choice, I prefer home appliances without clocks. Looking at today’s market, that’s harder than it sounds. Arguably, a device with a clock is cheaper than one without; just because the ones with timers get produced more often. Now I can understand why it took Steve Jobs two weeks to decide on a washing machine: We spent some time in our family talking about what’s the trade-off we want to make. We spent about two weeks talking about this. Every night at the dinner table. He chose a Miele Washing machine in the end - without a digital clock, I assume. " }, { "title": "Learn Some Rust During Hacktoberfest", "url": "https://endler.dev/2017/hacktoberfest/", - "body": "\n \n \n Dirndl, Lederhose, Brezn, Beer, Rust\n \n Designed by Freepik\n \n\nOctober is the perfect time to contribute to Open Source — at least according to Github and DigitalOcean.\nBecause that's when they organize Hacktoberfest, a global event where you get a free shirt and lots of street cred for creating pull requests. Read the official announcement here.\nSome people think they cannot contribute anything of value. Either because they lack the programming skills or because they don't know where to start.\nThis guide is trying to change that!\nLet me show you, how everybody can contribute code to Rust, a safe systems programming language.\nI was inspired to write this by a tweet from llogiq.\n1. Find a great Rust project to work on\nWe all want our work to be appreciated.\nTherefore I suggest to start contributing to medium-sized projects, because they gained some momentum but are still driven by a small number of maintainers, so help is always welcome. By contrast, tiny projects are mostly useful to the original author only, while large projects can be intimidating at first and have stricter guidelines.\nFor now, let's look at repositories with 5-100 stars, which were updated within this year.\nGithub supports advanced search options based on Lucene syntax. \n\nlanguage:Rust stars:5..100 pushed:>2017-01-01\n\nHere's a list of projects, which match this filter.\n2. Install the Rust toolchain\nTo start contributing, we need a working Rust compiler and the cargo package manager.\nFortunately, the installation should be straightforward.\nI recommend rustup for that.\nRun the following command in your terminal, then follow the onscreen instructions.\n\ncurl https://sh.rustup.rs -sSf | sh\n\nIf you're unsure, just accept the defaults.\nAfter the installation is done, we also need to get the nightly version of the compiler for later.\n\nrustup install nightly\n\nQuestions so far? Find more detailed installation instructions here.\n3. Fork the project and clone it to your computer\nFirst, click on the little fork button on the top right of the Github project page. Then clone your fork to your computer. \n\ngit clone git@github.com:yourusername/project.git\n\nFor more detailed instructions, go here.\n4. Does it build?\nBefore we start modifying the codebase, we should make sure that it is in a workable state.\nThe following commands should work right away from inside the project folder.\n\ncargo build\ncargo test\n\nIf not, you might want to consult the README for further instructions. (But feel free to choose another project.)\n5. The magic sauce\nHere's the trick: we use a linter called clippy to show us improvement areas in any Rust codebase.\nTo get clippy, install it like so:\n\ncargo +nightly install clippy\n\nAfterwards, run it from the project root as often as you like.\n\nrustup run nightly cargo clippy\n\nThis should give you actionable information on how to improve the codebase.\nHere's some sample output:\n\nwarning: useless use of `format!`\n --> src/mach/header.rs:420:49\n |\n420 | let error = error::Error::Malformed(format!("bytes size is smaller than an Mach-o header"));\n | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n |\n = note: #[warn(useless_format)] on by default\n = help: for further information visit https://rust-lang-nursery.github.io/rust-clippy/v0.0.165/index.html#useless_format\n\nwarning: this expression borrows a reference that is immediately dereferenced by the compiler\n --> src/mach/header.rs:423:36\n |\n423 | let magic = mach::peek(&bytes, 0)?;\n | ^^^^^^ help: change this to: `bytes`\n |\n = help: for further information visit https://rust-lang-nursery.github.io/rust-clippy/v0.0.165/index.html#needless_borrow\n\nJust try some of the suggestions and see if the project still compiles and the tests still pass.\nCheck out the links to the documentation in the help section to learn more.\nStart small to make your changes easier to review.\n6. Creating a Pull Request\nIf you're happy with your changes, now is the time to publish them!\nIt's best to create a new branch for your changes and then push it to your fork.\n\ngit checkout -b codestyle\ngit commit -am "Minor codestyle fixes"\ngit push --set-upstream origin codestyle\n\nAfterwards, go to the homepage of your fork on Github.\nThere should be a button titled Compare & pull request.\nPlease add a meaningful description and then submit the pull request.\nCongratulations! You've contributed to the Rust ecosystem. Thank you! 🎉\nTrophy case\n\nm4b/goblin\nfitzgen/cpp_demangle\nfdehau/tui-rs\nchristophertrml/rs-natural\n\nBonus!\nIf all of the manual fixing and checking sounds too dull, you can automate step number 5 using rustfix by Pascal Hertleif (@killercup):\n\nrustfix --yolo && cargo check\n" + "body": " Dirndl, Lederhose, Brezn, Beer, Rust Source: Designed by Freepik October is the perfect time to contribute to Open Source — at least according to Github and DigitalOcean. Because that’s when they organize Hacktoberfest, a global event where you get a free shirt and lots of street cred for creating pull requests. Read the official announcement here. Some people think they cannot contribute anything of value. Either because they lack the programming skills or because they don’t know where to start. This guide is trying to change that! Let me show you, how everybody can contribute code to Rust, a safe systems programming language. I was inspired to write this by a tweet from llogiq. 1. Find a great Rust project to work on We all want our work to be appreciated. Therefore I suggest to start contributing to medium-sized projects, because they gained some momentum but are still driven by a small number of maintainers, so help is always welcome. By contrast, tiny projects are mostly useful to the original author only, while large projects can be intimidating at first and have stricter guidelines. For now, let’s look at repositories with 5-100 stars, which were updated within this year. Github supports advanced search options based on Lucene syntax. language:Rust stars:5..100 pushed:&amp;gt;2017-01-01 Here’s a list of projects, which match this filter. 2. Install the Rust toolchain To start contributing, we need a working Rust compiler and the cargo package manager. Fortunately, the installation should be straightforward. I recommend rustup for that. Run the following command in your terminal, then follow the onscreen instructions. curl https:&#x2F;&#x2F;sh.rustup.rs -sSf | sh If you’re unsure, just accept the defaults. After the installation is done, we also need to get the nightly version of the compiler for later. rustup install nightly Questions so far? Find more detailed installation instructions here. 3. Fork the project and clone it to your computer First, click on the little fork button on the top right of the Github project page. Then clone your fork to your computer. git clone git@github.com:yourusername&#x2F;project.git For more detailed instructions, go here. 4. Does it build? Before we start modifying the codebase, we should make sure that it is in a workable state. The following commands should work right away from inside the project folder. cargo build cargo test If not, you might want to consult the README for further instructions. (But feel free to choose another project.) 5. The magic sauce Here’s the trick: we use a linter called clippy to show us improvement areas in any Rust codebase. To get clippy, install it like so: cargo +nightly install clippy Afterwards, run it from the project root as often as you like. rustup run nightly cargo clippy This should give you actionable information on how to improve the codebase. Here’s some sample output: warning: useless use of `format!` --&amp;gt; src&#x2F;mach&#x2F;header.rs:420:49 | 420 | let error = error::Error::Malformed(format!(&amp;quot;bytes size is smaller than an Mach-o header&amp;quot;)); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: #[warn(useless_format)] on by default = help: for further information visit https:&#x2F;&#x2F;rust-lang-nursery.github.io&#x2F;rust-clippy&#x2F;v0.0.165&#x2F;index.html#useless_format warning: this expression borrows a reference that is immediately dereferenced by the compiler --&amp;gt; src&#x2F;mach&#x2F;header.rs:423:36 | 423 | let magic = mach::peek(&amp;amp;bytes, 0)?; | ^^^^^^ help: change this to: `bytes` | = help: for further information visit https:&#x2F;&#x2F;rust-lang-nursery.github.io&#x2F;rust-clippy&#x2F;v0.0.165&#x2F;index.html#needless_borrow Just try some of the suggestions and see if the project still compiles and the tests still pass. Check out the links to the documentation in the help section to learn more. Start small to make your changes easier to review. 6. Creating a Pull Request If you’re happy with your changes, now is the time to publish them! It’s best to create a new branch for your changes and then push it to your fork. git checkout -b codestyle git commit -am &amp;quot;Minor codestyle fixes&amp;quot; git push --set-upstream origin codestyle Afterwards, go to the homepage of your fork on Github. There should be a button titled Compare &amp;amp; pull request. Please add a meaningful description and then submit the pull request. Congratulations! You’ve contributed to the Rust ecosystem. Thank you! 🎉 Trophy case m4b&#x2F;goblin fitzgen&#x2F;cpp_demangle fdehau&#x2F;tui-rs christophertrml&#x2F;rs-natural Bonus! If all of the manual fixing and checking sounds too dull, you can automate step number 5 using rustfix by Pascal Hertleif (@killercup): rustfix --yolo &amp;amp;&amp;amp; cargo check " }, { "title": "A Little Story About the `yes` Unix Command", "url": "https://endler.dev/2017/yes/", - "body": "What's the simplest Unix command you know?\nThere's echo, which prints a string to stdout and true, which always terminates with an exit code of 0.\nAmong the rows of simple Unix commands, there's also yes.\nIf you run it without arguments, you get an infinite stream of y's, separated by a newline:\n\ny\ny\ny\ny\n(...you get the idea)\n\nWhat seems to be pointless in the beginning turns out to be pretty helpful :\n\nyes | sh boring_installation.sh\n\nEver installed a program, which required you to type "y" and hit enter to keep going?\nyes to the rescue! It will carefully fulfill this duty, so you can keep watching Pootie Tang.\nWriting yes\nHere's a basic version in... uhm... BASIC.\n\n10 PRINT "y"\n20 GOTO 10\n\nAnd here's the same thing in Python:\n\nwhile True:\n print("y")\n\nSimple, eh? Not so quick!\nTurns out, that program is quite slow. \n\npython yes.py | pv -r > /dev/null\n[4.17MiB/s]\n\nCompare that with the built-in version on my Mac:\n\nyes | pv -r > /dev/null\n[34.2MiB/s]\n\nSo I tried to write a quicker version in Rust. Here's my first attempt:\n\nuse std::env;\n\nfn main() {\n let expletive = env::args().nth(1).unwrap_or("y".into());\n loop {\n println!("{}", expletive);\n }\n}\n\nSome explanations:\n\nThe string we want to print in a loop is the first command line parameter and is named expletive. I learned this word from the yes manpage.\nI use unwrap_or to get the expletive from the parameters. In case the parameter is not set, we use "y" as a default.\nThe default parameter gets converted from a string slice (&str) into an owned string on the heap (String) using into().\n\nLet's test it.\n\ncargo run --release | pv -r > /dev/null\n Compiling yes v0.1.0\n Finished release [optimized] target(s) in 1.0 secs\n Running `target/release/yes`\n[2.35MiB/s] \n\nWhoops, that doesn't look any better. It's even slower than the Python version!\nThat caught my attention, so I looked around for the source code of a C implementation.\nHere's the very first version of the program, released with Version 7 Unix and famously authored by Ken Thompson on Jan 10, 1979:\n\nmain(argc, argv)\nchar **argv;\n{\n for (;;)\n printf("%s\\n", argc>1? argv[1]: "y");\n}\n\nNo magic here.\nCompare that to the 128-line-version from the GNU coreutils, which is mirrored on Github. After 25 years, it is still under active development!\nThe last code change happened around a year ago.\nThat's quite fast:\n\n# brew install coreutils\ngyes | pv -r > /dev/null \n[854MiB/s]\n\nThe important part is at the end:\n\n/* Repeatedly output the buffer until there is a write error; then fail. */\nwhile (full_write (STDOUT_FILENO, buf, bufused) == bufused)\n continue;\n\nAha! So they simply use a buffer to make write operations faster.\nThe buffer size is defined by a constant named BUFSIZ, which gets chosen on each system so as to make I/O efficient (see here).\nOn my system, that was defined as 1024 bytes. I actually had better performance with 8192 bytes.\nI've extended my Rust program:\n\nuse std::env;\nuse std::io::{self, BufWriter, Write};\n\nconst BUFSIZE: usize = 8192;\n\nfn main() {\n let expletive = env::args().nth(1).unwrap_or("y".into());\n let mut writer = BufWriter::with_capacity(BUFSIZE, io::stdout());\n loop {\n writeln!(writer, "{}", expletive).unwrap();\n }\n}\n\nThe important part is, that the buffer size is a multiple of four, to ensure memory alignment.\nRunning that gave me 51.3MiB/s.\nFaster than the version, which comes with my system, but still way slower than the results from this Reddit post that I found, where the author talks about 10.2GiB/s.\nUpdate\nOnce again, the Rust community did not disappoint.\nAs soon as this post hit the Rust subreddit, user nwydo pointed out a previous discussion on the same topic.\nHere's their optimized code, that breaks the 3GB/s mark on my machine:\n\nuse std::env;\nuse std::io::{self, Write};\nuse std::process;\nuse std::borrow::Cow;\n\nuse std::ffi::OsString;\npub const BUFFER_CAPACITY: usize = 64 * 1024;\n\npub fn to_bytes(os_str: OsString) -> Vec<u8> {\n use std::os::unix::ffi::OsStringExt;\n os_str.into_vec()\n}\n\nfn fill_up_buffer<'a>(buffer: &'a mut [u8], output: &'a [u8]) -> &'a [u8] {\n if output.len() > buffer.len() / 2 {\n return output;\n }\n\n let mut buffer_size = output.len();\n buffer[..buffer_size].clone_from_slice(output);\n\n while buffer_size < buffer.len() / 2 {\n let (left, right) = buffer.split_at_mut(buffer_size);\n right[..buffer_size].clone_from_slice(left);\n buffer_size *= 2;\n }\n\n &buffer[..buffer_size]\n}\n\nfn write(output: &[u8]) {\n let stdout = io::stdout();\n let mut locked = stdout.lock();\n let mut buffer = [0u8; BUFFER_CAPACITY];\n\n let filled = fill_up_buffer(&mut buffer, output);\n while locked.write_all(filled).is_ok() {}\n}\n\nfn main() {\n write(&env::args_os().nth(1).map(to_bytes).map_or(\n Cow::Borrowed(\n &b"y\\n"[..],\n ),\n |mut arg| {\n arg.push(b'\\n');\n Cow::Owned(arg)\n },\n ));\n process::exit(1);\n}\n\nNow that's a whole different ballgame!\n\nWe prepare a filled string buffer, which will be reused for each loop.\nStdout is protected by a lock. So, instead of constantly acquiring and releasing it, we keep it all the time.\nWe use a the platform-native std::ffi::OsString and std::borrow::Cow to avoid unnecessary allocations.\n\nThe only thing that I could contribute was removing an unnecessary mut. 😅\nLessons learned\nThe trivial program yes turns out not to be so trivial after all.\nIt uses output buffering and memory alignment to improve performance.\nRe-implementing Unix tools is fun and makes me appreciate the nifty tricks,\nwhich make our computers fast.\n" + "body": "What’s the simplest Unix command you know? There’s echo, which prints a string to stdout and true, which always terminates with an exit code of 0. Among the series of simple Unix commands, there’s also yes. If you execute it without arguments, you get an infinite stream of y’s, separated by a newline: y y y y (...you get the idea) What seems to be pointless in the beginning turns out to be pretty helpful : yes | sh boring_installation.sh Ever installed a program, which required you to type “y” and hit enter to keep going? yes to the rescue! It will carefully fulfill its duty, so you can keep watching Pootie Tang. Writing yes Here’s a basic version in… uhm… BASIC. 10 PRINT &amp;quot;y&amp;quot; 20 GOTO 10 And here’s the same thing in Python: while True: print(&amp;quot;y&amp;quot;) Simple, eh? Not so quick! Turns out, that program is quite slow. python yes.py | pv -r &amp;gt; &#x2F;dev&#x2F;null [4.17MiB&#x2F;s] Compare that with the built-in version on my Mac: yes | pv -r &amp;gt; &#x2F;dev&#x2F;null [34.2MiB&#x2F;s] So I tried to write a quicker version in Rust. Here’s my first attempt: use std::env; fn main() let expletive = env::args().nth(1).unwrap_or(&amp;quot;y&amp;quot;.into()); loop println!(&amp;quot; &amp;quot;, expletive); Some explanations: The string we want to print in a loop is the first command line parameter and is named expletive. I learned this word from the yes manpage. I use unwrap_or to get the expletive from the parameters. In case the parameter is not set, we use “y” as a default. The default parameter gets converted from a string slice (&amp;amp;str) into an owned string on the heap (String) using into(). Let’s test it. cargo run --release | pv -r &amp;gt; &#x2F;dev&#x2F;null Compiling yes v0.1.0 Finished release [optimized] target(s) in 1.0 secs Running `target&#x2F;release&#x2F;yes` [2.35MiB&#x2F;s] Whoops, that doesn’t look any better. It’s even slower than the Python version! That caught my attention, so I looked around for the source code of a C implementation. Here’s the very first version of the program, released with Version 7 Unix and famously authored by Ken Thompson on Jan 10, 1979: main(argc, argv) char **argv; for (;;) printf(&amp;quot;%sn&amp;quot;, argc&amp;gt;1? argv[1]: &amp;quot;y&amp;quot;); No magic here. Compare that to the 128-line-version from the GNU coreutils, which is mirrored on Github. After 25 years, it is still under active development! The last code change happened around a year ago. That’s quite fast: # brew install coreutils gyes | pv -r &amp;gt; &#x2F;dev&#x2F;null [854MiB&#x2F;s] The important part is at the end: &#x2F;* Repeatedly output the buffer until there is a write error; then fail. *&#x2F; while (full_write (STDOUT_FILENO, buf, bufused) == bufused) continue; Aha! So they simply use a buffer to make write operations faster. The buffer size is defined by a constant named BUFSIZ, which gets chosen on each system so as to make I&#x2F;O efficient (see here). On my system, that was defined as 1024 bytes. I actually had better performance with 8192 bytes. I’ve extended my Rust program: use std::env; use std::io:: self, BufWriter, Write ; const BUFSIZE: usize = 8192; fn main() let expletive = env::args().nth(1).unwrap_or(&amp;quot;y&amp;quot;.into()); let mut writer = BufWriter::with_capacity(BUFSIZE, io::stdout()); loop writeln!(writer, &amp;quot; &amp;quot;, expletive).unwrap(); The important part is, that the buffer size is a multiple of four, to ensure memory alignment. Running that gave me 51.3MiB&#x2F;s. Faster than the version, which comes with my system, but still way slower than the results from this Reddit post that I found, where the author talks about 10.2GiB&#x2F;s. Update Once again, the Rust community did not disappoint. As soon as this post hit the Rust subreddit, user nwydo pointed out a previous discussion on the same topic. Here’s their optimized code, that breaks the 3GB&#x2F;s mark on my machine: use std::env; use std::io:: self, Write ; use std::process; use std::borrow::Cow; use std::ffi::OsString; pub const BUFFER_CAPACITY: usize = 64 * 1024; pub fn to_bytes(os_str: OsString) -&amp;gt; Vec&amp;lt;u8&amp;gt; use std::os::unix::ffi::OsStringExt; os_str.into_vec() fn fill_up_buffer&amp;lt;&amp;#39;a&amp;gt;(buffer: &amp;amp;&amp;#39;a mut [u8], output: &amp;amp;&amp;#39;a [u8]) -&amp;gt; &amp;amp;&amp;#39;a [u8] if output.len() &amp;gt; buffer.len() &#x2F; 2 return output; let mut buffer_size = output.len(); buffer[..buffer_size].clone_from_slice(output); while buffer_size &amp;lt; buffer.len() &#x2F; 2 let (left, right) = buffer.split_at_mut(buffer_size); right[..buffer_size].clone_from_slice(left); buffer_size *= 2; &amp;amp;buffer[..buffer_size] fn write(output: &amp;amp;[u8]) let stdout = io::stdout(); let mut locked = stdout.lock(); let mut buffer = [0u8; BUFFER_CAPACITY]; let filled = fill_up_buffer(&amp;amp;mut buffer, output); while locked.write_all(filled).is_ok() fn main() write(&amp;amp;env::args_os().nth(1).map(to_bytes).map_or( Cow::Borrowed( &amp;amp;b&amp;quot;yn&amp;quot;[..], ), |mut arg| arg.push(b&amp;#39;n&amp;#39;); Cow::Owned(arg) , )); process::exit(1); Now that’s a whole different ballgame! We prepare a filled string buffer, which will be reused for each loop. Stdout is protected by a lock. So, instead of constantly acquiring and releasing it, we keep it all the time. We use a the platform-native std::ffi::OsString and std::borrow::Cow to avoid unnecessary allocations. The only thing that I could contribute was removing an unnecessary mut. 😅 Lessons learned The trivial program yes turns out not to be so trivial after all. It uses output buffering and memory alignment to improve performance. Re-implementing Unix tools is fun and makes me appreciate the nifty tricks, which make our computers fast. " }, { "title": "Lightning Fast Image Previews with Pure CSS and LQIP", "url": "https://endler.dev/2017/image-previews/", - "body": "\n \n \n \n Adapted from Freepik\n \n\nMy website is reasonably fast.\nI hope that every page load feels snappy, no matter your device or location.\nThat should not come as a surprise. After all, I'm just using plain HTML and CSS.\nJavaScript is avoided whenever possible.\nThere was one thing left, which really annoyed me: layout reflow after images got loaded.\nThe problem is, that the image dimensions are not known when the text is ready to be displayed.\nAs a result, the text will be pushed down on the screen as soon as an image is loaded above.\nAlso, while an image is loading, there is no preview, just blank space.\nHere's what that looks like on a slower connection:\n\nI could fix that, by hardcoding the image width and height, but that would be tedious and error-prone.\nAnd there would be no preview.\nSo I was wondering, what others were doing. 🤔\nTiny image thumbnails\nI vaguely remembered, that Facebook uses tiny preview thumbnails in their mobile app.\nThey extract the quantization table from the JPEG header to render the preview. This information \nis stored on the client, so it doesn't need to be downloaded every time.\nUnfortunately, this approach requires full control over the image encoder.\nIt works for apps, but hardly for websites.\nThe search continued.\nUntil my colleague Tobias Baldauf introduced me to LQIP (Low-Quality Image Placeholders).\nHere's the idea:\n\nLoad the page including inlined, low-quality image thumbnails.\nOnce the page is fully loaded (e.g. when the onload event is fired), lazy load full quality images.\n\nUnfortunately, this technique requires JavaScript.\nNevertheless, I liked the idea, so I started experimenting with different image sizes and formats. My goal was to create the smallest thumbnails using any standard image format.\nBenchmark\nHere are 15 pixel wide thumbnails encoded in different file formats:\n\nI used different tools to create the thumbnails.\nFor JPEG and PNG encoding, I used svgexport.\n\nsvgexport img.svg img.png "svg{background:white;}" 15: 1%\n\nFor webp, I used cwebp:\n\ncwebp img.png -o img.webp\n\nThe gif was converted using an online tool and optimized using gifsicle:\n\ngifsicle -O3 < img.gif > img_mini.gif\nComparison\nWebP is the smallest, but it's not supported by all browsers.\nGif was second, but when resizing the image and applying the blur filter, I was not happy with the result.\nIn the end, I settled for PNG, which provided an excellent tradeoff between size and quality.\nI optimized the images even further using oxipng, which supports zopfli compression.\nWith that, I end up with thumbnails of around 300-400 bytes in size.\nI integrated the thumbnail creation process into my build toolchain for the blog.\nThe actual code to create the images is rather boring.\nIf you really want to have a look, it's on Github.\nAvoiding JavaScript\nHere is the skeleton HTML for the image previews:\n\n<figure>\n <div class="loader">\n <object data="image.svg" type="image/svg+xml"></object>\n <img class="frozen" src="data:image/png;base64,..." />\n </div>\n</figure>\n\nThe trick is to wrap both the full-size image and the preview image into a loader div,\nwhich gets a width: auto CSS attribute:\n\n.loader {\n position:relative;\n overflow: hidden;\n width: auto;\n}\n\nI wrap the SVG into an object tag instead of using an img element.\nThis has the benefit, that I can show a placeholder in case the SVG can't be loaded.\nI position the object at the top left of the loader div.\n\n.loader object {\n position: absolute;\n}\n\n.loader img, .loader object {\n display: block;\n top: 0;\n left: 0;\n width: 100%;\n}\n\nHere's the placeholder hack including some references:\n\n// https://stackoverflow.com/a/29111371/270334\n// https://stackoverflow.com/a/32928240/270334\nobject {\n position: relative;\n float: left;\n display: block;\n \n &::after {\n position: absolute;\n top: 0;\n left: 0;\n display: block;\n width: 1000px;\n height: 1000px;\n content: '';\n background: #efefef;\n }\n}\n\nThe last part is the handling of the thumbnails.\nLike most other sites, I decided to apply a blur filter.\nIn a way, it looks like the image is frozen, so that's what I called the CSS selector.\nI also applied a scaling transformation to achieve sharp borders.\n\n.frozen {\n -webkit-filter: blur(8px);\n -moz-filter: blur(8px);\n -o-filter: blur(8px);\n -ms-filter: blur(8px);\n filter: blur(8px);\n transform: scale(1.04);\n animation: 0.2s ease-in 0.4s 1 forwards fade;\n width: 100%;\n}\n\n@keyframes fade {\n 0% {\n opacity:1;\n }\n 100% {\n opacity:0;\n }\n}\n\nI use CSS animations instead of JavaScript.\nThe duration of the animation is based on the 95% percentile load time of all visitors of the page. Although it's just an approximation, this should work for most readers.\nResult\n\nNo JavaScript needed\nWorks on all modern browsers\nSupports a fallback in case the main image can't be loaded\nTiny overhead\n\nResources\n\nIntroducing LQIP – Low Quality Image Placeholders\nHow Medium does progressive image loading\nSQIP, a new preview technique using pure SVG\n\n" + "body": " Source: Adapted from Freepik My website is reasonably fast. I hope that every page load feels snappy, no matter your device or location. That should not come as a surprise. After all, I’m just using plain HTML and CSS. JavaScript is avoided whenever possible. There was one thing left, which really annoyed me: layout reflow after images got loaded. The problem is, that the image dimensions are not known when the text is ready to be displayed. As a result, the text will be pushed down on the screen as soon as an image is loaded above. Also, while an image is loading, there is no preview, just blank space. Here’s what that looks like on a slower connection: Illustration of a flash of unstyled content I could fix that, by hardcoding the image width and height, but that would be tedious and error-prone. And there would be no preview. So I was wondering, what others were doing. 🤔 Tiny image thumbnails I vaguely remembered, that Facebook uses tiny preview thumbnails in their mobile app. They extract the quantization table from the JPEG header to render the preview. This information is stored on the client, so it doesn’t need to be downloaded every time. Unfortunately, this approach requires full control over the image encoder. It works for apps, but hardly for websites. The search continued. Until my colleague Tobias Baldauf introduced me to LQIP (Low-Quality Image Placeholders). Here’s the idea: Load the page including inlined, low-quality image thumbnails. Once the page is fully loaded (e.g. when the onload event is fired), lazy load full quality images. Unfortunately, this technique requires JavaScript. Nevertheless, I liked the idea, so I started experimenting with different image sizes and formats. My goal was to create the smallest thumbnails using any standard image format. Benchmark Here are 15 pixel wide thumbnails encoded in different file formats: Comparison of different image formats when creating thumbnails I used different tools to create the thumbnails. For JPEG and PNG encoding, I used svgexport. svgexport img.svg img.png &amp;quot;svg background:white; &amp;quot; 15: 1% For webp, I used cwebp: cwebp img.png -o img.webp The gif was converted using an online tool and optimized using gifsicle: gifsicle -O3 &amp;lt; img.gif &amp;gt; img_mini.gif Comparison WebP is the smallest, but it’s not supported by all browsers. Gif was second, but when resizing the image and applying the blur filter, I was not happy with the result. In the end, I settled for PNG, which provided an excellent tradeoff between size and quality. I optimized the images even further using oxipng, which supports zopfli compression. With that, I end up with thumbnails of around 300-400 bytes in size. I integrated the thumbnail creation process into my build toolchain for the blog. The actual code to create the images is rather boring. If you really want to have a look, it’s on Github. Avoiding JavaScript Here is the skeleton HTML for the image previews: &amp;lt;figure&amp;gt; &amp;lt;div class=&amp;quot;loader&amp;quot;&amp;gt; &amp;lt;object data=&amp;quot;image.svg&amp;quot; type=&amp;quot;image&#x2F;svg+xml&amp;quot;&amp;gt;&amp;lt;&#x2F;object&amp;gt; &amp;lt;img class=&amp;quot;frozen&amp;quot; src=&amp;quot;data:image&#x2F;png;base64,...&amp;quot; &#x2F;&amp;gt; &amp;lt;&#x2F;div&amp;gt; &amp;lt;&#x2F;figure&amp;gt; The trick is to wrap both the full-size image and the preview image into a loader div, which gets a width: auto CSS attribute: .loader position: relative; overflow: hidden; width: auto; I wrap the SVG into an object tag instead of using an img element. This has the benefit, that I can show a placeholder in case the SVG can’t be loaded. I position the object at the top left of the loader div. .loader object position: absolute; .loader img, .loader object display: block; top: 0; left: 0; width: 100%; Here’s the placeholder hack including some references: &#x2F;* https:&#x2F;&#x2F;stackoverflow.com&#x2F;a&#x2F;29111371&#x2F;270334 *&#x2F; &#x2F;* https:&#x2F;&#x2F;stackoverflow.com&#x2F;a&#x2F;32928240&#x2F;270334 *&#x2F; object position: relative; float: left; display: block; &amp;amp;::after position: absolute; top: 0; left: 0; display: block; width: 1000px; height: 1000px; content: &amp;quot;&amp;quot;; background: #efefef; The last part is the handling of the thumbnails. Like most other sites, I decided to apply a blur filter. In a way, it looks like the image is frozen, so that’s what I called the CSS selector. I also applied a scaling transformation to achieve sharp borders. .frozen -webkit-filter: blur(8px); -moz-filter: blur(8px); -o-filter: blur(8px); -ms-filter: blur(8px); filter: blur(8px); transform: scale(1.04); animation: 0.2s ease-in 0.4s 1 forwards fade; width: 100%; @keyframes fade 0% opacity: 1; 100% opacity: 0; I use CSS animations instead of JavaScript. The duration of the animation is based on the 95% percentile load time of all visitors of the page. Although it’s just an approximation, this should work for most readers. Result No JavaScript needed Works on all modern browsers Supports a fallback in case the main image can’t be loaded Tiny overhead Resources Introducing LQIP – Low Quality Image Placeholders How Medium does progressive image loading SQIP, a new preview technique using pure SVG " }, { "title": "Go vs Rust? Choose Go.", "url": "https://endler.dev/2017/go-vs-rust/", - "body": "\n \n \n \n Gopher designed with Gopherize.me. Cogwheels designed by Freepik\n \n\n"Rust or Go, which one should I choose?" is a question I get quite often.\nBoth languages seem to be competing for the same user base and they both seem to be\nsystems programming languages, so there must be a clear winner, right?\ntl;dr: It's not so easy. Both languages have a different scope. Golang shines for writing microservices and for typical "DevOps" tasks, but it is not a systems programming language. Rust is stronger for tasks where concurrency, safety and/or performance are important; but it has a steeper learning curve than Go.\nGo: practical, pragmatic, plain\n\nI don't think Go is an elegant language. Its biggest feature is simplicity.\nGo is not even a systems programming language. While it's great for writing microservices and tooling around backend infrastructure, I would not want to write a kernel or a memory allocator with it.\nBut with Go, you get things done — fast.\nGo is one of the most productive languages I've ever worked with.\nThe mantra is: solve real problems today. \nRust's strong guarantees come at a cost\n\nRust in comparison is hard. It took me many months to become somewhat productive.\nYou need to invest a serious amount of time to see any benefit.\nRust is already a powerful language and it gets stronger every day.\nIt feels much more like a pragmatic Haskell to me than a safer C.\nDon't get me wrong: I love Rust, and it helped me become a better programmer. It is certainly a nice language to learn. The big question is, if it is the right choice for your next major project.\nHere's the thing: if you choose Rust, usually you need the guarantees, that the language provides:\n\nSafety against Null pointers, race conditions and all sorts of low-level threats.\nPredictable runtime behavior (zero cost abstractions and no garbage collector).\n(Almost) total control over the hardware (memory layout, processor features).\nSeamless interoperability with other languages.\n\nIf you don't require any of these features, Rust might be a poor choice for your next project.\nThat's because these guarantees come with a cost: ramp-up time.\nYou'll need to unlearn bad habits and learn new concepts.\nChances are, you will fight with the borrow checker a lot when you start out.\nCase-study: Primality by trial division\nLet's say, you want to check if a number is prime.\nThe easiest way is to check if we can divide the number by any smaller natural number (without a remainder). If not, we found a prime number! This approach is called trial division.\nHere's how to do that in Golang (courtesy of Rosetta Code):\n\nfunc IsPrime(n int) bool {\n\tif n < 0 {\n\t\tn = -n\n\t}\n\tswitch {\n\tcase n < 2:\n\t\treturn false\n\tdefault:\n\t\tfor i := 2; i < n; i++ {\n\t\t\tif n%i == 0 {\n\t\t\t\treturn false\n\t\t\t}\n\t\t}\n\t}\n\treturn true\n}\n\nAnd here's the same thing in Rust:\n\npub fn is_prime(n: u64) -> bool {\n match n {\n 0...1 => false,\n _ => {\n for d in 2..n {\n if n % d == 0 {\n return false;\n }\n }\n true\n }\n }\n}\n\nAt first sight, both solutions look pretty similar.\nBut if we look closer, we can spot some differences.\n\nIn Go, we use a simple switch-case statement. In Rust, we use a match statement, which is much more powerful.\nIn Go, we use a simple for-loop to iterate over the numbers 2 to n. In Rust, we use a range expression (2..n).\nIn Go, we use two return statements, in Rust we have one return expression. In general, most things in Rust are expressions, which can be returned and assigned to a variable. Read more about expressions here.\n\nIn many areas, Rust is more functional than Golang. You could rewrite the above code using the any method, which is implemented for Range.\n\nfn is_prime(n: u64) -> bool {\n match n {\n 0...1 => false,\n _ => !(2..n).any(|d| n % d == 0),\n }\n}\n\nIt might seem a little alien at first, but it will become second-nature after a while.\nThis was just a quick example, of course. I suggest, you browse some code on Rosetta Code to get a better feeling for both languages.\nCase study: Finding duplicate words in text files\nIf you're more like a visual type, here is a video where I write a simple\nconcurrent program in Go and Rust to compare both languages:\n\n \n \n\nSome things I prefer in Go\n\nFast compile times\nPragmatic problem-solving approach\nNice ecosystem for typical DevOps tasks\nBatteries-included standard-library\nIDE support\nSimple error handling\nThe mascot 😉\n\nSome things I prefer in Rust\n\nSafety: No null pointers, no data races,...\nFine-grained system control\nIncredible runtime speed (comparable with C/C++)\nZero-cost abstractions\nAwesome, open-minded community\nSimple package management with cargo\nSupport for Generics in form of traits\nC interop and FFI\n\nConclusion\n99% of the time, Go is "good enough" and that 1% where it isn't, you'll know.\nAnd then take a look at Rust, because the two languages complement each other pretty well.\nAfter all is said and done, Rust and Go are not really competitors.\n" + "body": " Source: Gopher designed with Gopherize.me. Gears designed by Freepik. I wrote this article a long time ago. In the meantime, my opinion on some aspects has changed. In order to give a more balanced perspective on the pros and cons, I suggest to read this comparison on Go vs Rust instead, which I wrote in collaboration with Shuttle 🚀 Rust vs Go: A Hands-On Comparison “Rust or Go, which one should I choose?” is a question I get quite often. Both languages seem to be competing for the same user base and they both seem to be systems programming languages, so there must be a clear winner, right? Go: practical, pragmatic, plain The Golang learning curve over time, a straight line. I don’t think Go is an elegant language. Its biggest feature is simplicity easiness. Go is not even a systems programming language. While it’s great for writing microservices and tooling around backend infrastructure, I would not want to write a kernel or a memory allocator with it. But with Go, you get things done — fast. Go is one of the most productive languages I’ve ever worked with. The mantra is: solve real problems today. Rust’s strong guarantees come at a cost The Rust learning curve over time, a bumpy ride. Rust in comparison is hard. It took me many months to become somewhat productive. You need to invest a serious amount of time to see any benefit. Rust is already a powerful language and it gets stronger every day. It feels much more like a pragmatic Haskell to me than a safer C. Don’t get me wrong: I love Rust, and it helped me become a better programmer. It is certainly a nice language to learn. The big question is, if it is the right choice for your next major project. Here’s the thing: if you choose Rust, usually you need the guarantees, that the language provides: Safety against Null pointers, race conditions and all sorts of low-level threats. Predictable runtime behavior (zero cost abstractions and no garbage collector). (Almost) total control over the hardware (memory layout, processor features). Seamless interoperability with other languages. If you don’t require any of these features, Rust might be a poor choice for your next project. That’s because these guarantees come with a cost: ramp-up time. You’ll need to unlearn bad habits and learn new concepts. Chances are, you will fight with the borrow checker a lot when you start out. Case-study: Primality by trial division Let’s say, you want to check if a number is prime. The easiest way is to check if we can divide the number by any smaller natural number (without a remainder). If not, we found a prime number! This approach is called trial division. Here’s how to do that in Golang (courtesy of Rosetta Code): func IsPrime(n int) bool if n &amp;lt; 0 n = -n switch case n &amp;lt; 2: return false default: for i := 2; i &amp;lt; n; i++ if n%i == 0 return false return true And here’s the same thing in Rust: pub fn is_prime(n: u64) -&amp;gt; bool match n 0...1 =&amp;gt; false, _ =&amp;gt; for d in 2..n if n % d == 0 return false; true At first sight, both solutions look pretty similar. But if we look closer, we can spot some differences. In Go, we use a simple switch-case statement. In Rust, we use a match statement, which is much more powerful. In Go, we use a simple for-loop to iterate over the numbers 2 to n. In Rust, we use a range expression (2..n). In Go, we use two return statements, in Rust we have one return expression. In general, most things in Rust are expressions, which can be returned and assigned to a variable. Read more about expressions here. In many areas, Rust is more functional than Golang. You could rewrite the above code using the any method, which is implemented for Range. fn is_prime(n: u64) -&amp;gt; bool match n 0...1 =&amp;gt; false, _ =&amp;gt; !(2..n).any(|d| n % d == 0), It might seem a little alien at first, but it will become second-nature after a while. This was just a quick example, of course. I suggest, you browse some code on Rosetta Code to get a better feeling for both languages. Case study: Finding duplicate words in text files If you’re more like a visual type, here is a video where I write a simple concurrent program in Go and Rust to compare both languages: document.addEventListener( DOMContentLoaded , function() lightEmbedInit(); ); Some things I prefer in Go Fast compile times Pragmatic problem-solving approach Nice ecosystem for typical DevOps tasks Batteries-included standard-library IDE support Simple error handling The mascot 😉 Some things I prefer in Rust Safety: No null pointers, no data races,… Fine-grained system control Incredible runtime speed (comparable with C&#x2F;C++) Zero-cost abstractions Awesome, open-minded community Simple package management with cargo Support for Generics in form of traits C interop and FFI Conclusion 99% of the time, Go is “good enough” and that 1% where it isn’t, you’ll know. And then take a look at Rust, because the two languages complement each other pretty well. If you’re interested in hands-on Rust consulting, pick a date from my calendar and we can talk about how I can help. After all is said and done, Rust and Go are not really competitors. " }, { "title": "Afraid of Makefiles? Don't be!", "url": "https://endler.dev/2017/makefiles/", - "body": "\n \n \n What do clothes have to do with Makefiles? Find out in this post.\n \n Illustration by Anindyanfitri - Freepik.com\n \n\nIn the last few years, I've had the pleasure to work with a lot of talented Software Engineers.\nOne thing that struck me is, that many of them did not have any working knowledge of Makefiles \nand why they are useful.\nWhen faced with the task to automate a build process, they often roll their own shell scripts.\nCommon culprits are called build.sh or run.sh or doall.sh etc.\nThey implement the same basic functionality over and over again:\n\nParsing input parameters and environment variables.\nManually managing dependencies between build steps.\nError handling... maybe.\n\nAlong the way, they keep making the same basic mistakes:\n\nIncorrectly handling input parameters and environment variables.\nMissing dependencies between build steps.\nForgetting to handle errors and — even worse — carrying on with the program execution.\n\nMakefiles are scary!\nIf you think that make is scary, you probably think of complicated build machinery for big software projects.\nIt doesn't need to be that way. Let's hear what the author of make, Stuart Feldman has to say:\n\nIt began with an elaborate idea of a dependency analyzer, boiled down to something much simpler, and turned into Make that weekend. Use of tools that were still wet was part of the culture. Makefiles were text files, not magically encoded binaries because that was the Unix ethos: printable, debuggable, understandable stuff.\n— The Art of Unix Programming (2003)\n\nMakefiles are simple!\nBefore I leave the house, I need to get dressed.\nI use the same simple routine every time:\nUnderpants, trousers, shirt, pullover, socks, shoes, jacket.\nMost likely you also have a routine, even though yours might be different.\nSome of these steps depend on each other.\nMake is useful for handling dependencies.\nLet's try to express my routine as a Makefile.\n\ndress: trousers shoes jacket\n\t@echo "All done. Let's go outside!"\n\njacket: pullover\n\t@echo "Putting on jacket."\n\npullover: shirt\n\t@echo "Putting on pullover."\n\nshirt:\n\t@echo "Putting on shirt."\n\ntrousers: underpants\n\t@echo "Putting on trousers."\n\nunderpants:\n\t@echo "Putting on underpants."\n\nshoes: socks\n\t@echo "Putting on shoes."\n\nsocks: pullover\n\t@echo "Putting on socks."\n\nIf we execute the Makefile, we get the following output:\n\n$ make dress\nPutting on underpants.\nPutting on trousers.\nPutting on shirt.\nPutting on pullover.\nPutting on socks.\nPutting on shoes.\nPutting on jacket.\nAll done. Let's go outside!\nWhat just happened?\nNoticed how the steps are in the correct order?\nBy plainly writing down the dependencies between the steps, make helps us to execute them correctly.\nEach build step has the following structure:\n\ntarget: [dependencies]\n\t<shell command to execute>\n\t<shell command to execute>\n\t...\n\n\n\nThe first target in a Makefile will be executed by default when we call make.\n\n\nThe order of the targets does not matter.\n\n\nShell commands must be indented with a tab.\n\n\nAdd an @ sign to suppress output of the command that is executed.\n\n\nIf target isn't a file you want to build, please add .PHONY <target> at the end of the build step.\nCommon phony targets are: clean, install, run,...\n\ninstall: \n\tnpm install\n.PHONY: install\n\nOtherwise, if somebody creates an install directory, make will silently fail, because the build target already exists.\n\n\nCongratulations! You've learned 90% of what you need to know about make.\nNext steps\nReal Makefiles can do much more! They will only build the files that have changed instead of doing a full rebuild.\nAnd they will do as much as possible in parallel.\n" + "body": " What do clothes have to do with Makefiles? Find out in this post! Source: Illustration by Anindyanfitri - Freepik.com In the last few years, I’ve had the pleasure to work with a lot of talented Software Engineers. One thing that struck me is that many of them did not have any working knowledge of Makefiles and why they are useful. When faced with the task to automate a build process, they often roll their own shell scripts. Common culprits are called build.sh, run.sh or doall.sh in a project folder. They implement the same basic functionality over and over again: Parsing input parameters and environment variables. Manually managing dependencies between build steps. Error handling (…maybe). Along the way, they keep making the same basic mistakes: Incorrectly handling input parameters and environment variables. Missing dependencies between build steps. Forgetting to handle errors and — even worse — carrying on with the program execution. These are issues Makefiles were invented to solve. Makefiles are scary! If you think that make is scary, you probably think of complicated build machinery for big software projects. It doesn’t need to be that way. Let’s hear from the author of make, Stuart Feldman himself: It began with an elaborate idea of a dependency analyzer, boiled down to something much simpler, and turned into Make that weekend. Use of tools that were still wet was part of the culture. Makefiles were text files, not magically encoded binaries because that was the Unix ethos: printable, debuggable, understandable stuff. — The Art of Unix Programming (2003) Make was built in one weekend to solve a reoccuring problem in a simple way. Makefiles are simple! Before I leave the house, I need to get dressed. I use the same simple routine every time: Underpants, trousers, shirt, pullover, socks, shoes, jacket. Most likely you also have a routine, even though yours might be different. Some of these steps depend on each other. Make is useful for handling dependencies. Let’s try to express my routine as a Makefile. dress: trousers shoes jacket @echo &amp;quot;All done. Let&amp;#39;s go outside!&amp;quot; jacket: pullover @echo &amp;quot;Putting on jacket.&amp;quot; pullover: shirt @echo &amp;quot;Putting on pullover.&amp;quot; shirt: @echo &amp;quot;Putting on shirt.&amp;quot; trousers: underpants @echo &amp;quot;Putting on trousers.&amp;quot; underpants: @echo &amp;quot;Putting on underpants.&amp;quot; shoes: socks @echo &amp;quot;Putting on shoes.&amp;quot; socks: pullover @echo &amp;quot;Putting on socks.&amp;quot; If we execute the Makefile, we get the following output: $ make dress Putting on underpants. Putting on trousers. Putting on shirt. Putting on pullover. Putting on socks. Putting on shoes. Putting on jacket. All done. Let&amp;#39;s go outside! What just happened? Noticed how the steps are in the correct order? By plainly writing down the dependencies between the steps, make helps us to execute them correctly. Each build step has the following structure: target: [dependencies] &amp;lt;shell command to execute&amp;gt; &amp;lt;shell command to execute&amp;gt; ... The first target in a Makefile will be executed by default when we call make. The order of the targets does not matter. Shell commands must be indented with a tab. Add an @ sign to suppress output of the command that is executed. If target isn’t a file you want to build, please add .PHONY &amp;lt;target&amp;gt; at the end of the build step. Common phony targets are: clean, install, run,… Otherwise, if somebody creates an install directory, make will silently fail, because the build target already exists. .PHONY: install install: npm install Congratulations! You’ve learned 90% of what you need to know about make. Next steps Real Makefiles can do much more! They will only build the files that have changed instead of doing a full rebuild. And they will do as much as possible in parallel. Just try to keep them simple please. " }, { "title": "Of Boxes and Trees - Smart Pointers in Rust", "url": "https://endler.dev/2017/boxes-and-trees/", - "body": "Recently, I tried to implement a binary tree data structure in Rust.\nEach binary tree has a root value, a left, and a right subtree.\nI started from this Python implementation, which is quite straightforward.\n\n\nclass Tree:\n def __init__(self, val, left=None, right=None):\n self.val = val\n self.left = left\n self.right = right\n\nThis allows us to declare a fancy tree object like this:\n\nt = Tree(15,\n Tree(12,\n None,\n Tree(13)),\n Tree(22,\n Tree(18),\n Tree(100)))\n\nAnd the result can be visualized beautifully.\n(Yes I've drawn that myself.)\n\n \n \n A binary search tree representing our data structure\n \n\nPorting that code to Rust turned out to be a little... challenging.\nMy first attempt looked quite innocuous.\n\nstruct Tree {\n root: i64,\n left: Tree,\n right: Tree,\n}\n\nThat's pretty much a one-to-one translation of the Python definition — but rustc says no.\n\nerror[E0072]: recursive type `Tree` has infinite size\n --> src/main.rs:1:1\n |\n1 | struct Tree {\n | ^^^^^^^^^^^ recursive type has infinite size\n |\n = help: insert indirection (e.g., a `Box`, `Rc`, or `&`) at some point to make `Tree` representable\n\nComing from memory-managed languages (like Python, PHP, or Ruby), I was confused by this.\nThe problem is easy to understand, though.\nComputers have a limited amount of memory.\nIt's the compiler's job to find out how much memory to allocate for each item.\nIn our case, it infers the following:\nA tree is a structure containing an i64, and two trees. Each of these trees is a structure containing an i64, and two trees. Each of these...\nYou get the idea.\n\nTree { i64, Tree, Tree }\nTree { i64, Tree { ... }, Tree { ... } }\n// The next expansion won't fit on the page anymore\n\nSince we don't know how many subtrees our tree will have, there is no way to tell how much memory we need to allocate up front. We'll only know at runtime!\nRust tells us how to fix that: by inserting an indirection like Box, Rc, or &.\nThese are different "pointer types" in Rust. They all point to places in memory. So, instead of knowing the total size of our tree structure, we just know the point in memory where the tree is located. But that's enough to define the tree structure.\nThese pointer types allow us to do that safely and without manual memory management.\nThey all offer different guarantees and you should choose the one that fits your requirements best.\n\n\n& is called a borrow in Rust speech. It's the most common of the three. It's a reference to some place in memory, but it does not own the data it points to. As such, the lifetime of the borrow depends on its owner.\nTherefore we would need to add lifetime parameters here. This can make it tedious to use.\n\nstruct Tree<'a> {\n root: i64,\n left: &'a Tree<'a>,\n right: &'a Tree<'a>,\n}\n\n\nBox is a smart pointer with zero runtime overhead. It owns the data it points to.\nWe call it smart because when it goes out of scope, it will first drop the data it points to and then itself. No manual memory management required.\n\nstruct Tree {\n root: i64,\n left: Box<Tree>,\n right: Box<Tree>,\n}\n\n\nRc is another smart pointer. It's short for "reference-counting". It keeps track of the number of references to a data structure. As soon as the number of references is down to zero, it cleans up after itself.\nChoose Rc if you need to have multiple owners of the same data in one thread.\nFor multithreading, there's also Arc (atomic reference count).\n\nstruct Tree {\n root: i64,\n left: Rc<Tree>,\n right: Rc<Tree>,\n}\n\n\nPutting the tree into a box\nAll three options are totally valid. Which one you should choose, depends on your use-case.\nA rule of thumb is to keep it simple.\nIn my case, I chose to use a Box, because I did not need any special guarantees.\nMaking subtrees optional\nThe next problem I faced was that I could not instantiate a tree structure.\nThe left and right subtree have the type Box<Tree>, but at some\npoint I would need an empty subtree.\nIn the Python example, I used None to signal the end of my data structure.\nThanks to Rust's Option type we can do the same:\n\nstruct Tree {\n root: i64,\n left: Option<Box<Tree>>,\n right: Option<Box<Tree>>,\n}\n\nAfter all of this, we can create our first tree:\n\nTree {\n root: 15,\n left: Some(Box::new(Tree {\n root: 12,\n left: None,\n right: Some(Box::new(Tree {\n root: 13,\n left: None,\n right: None,\n })),\n })),\n right: Some(Box::new(Tree {\n root: 22,\n left: Some(Box::new(Tree {\n root: 18,\n left: None,\n right: None,\n })),\n right: Some(Box::new(Tree {\n root: 100,\n left: None,\n right: None,\n })),\n })),\n};\n\nDepending on your point of view, you might say this is either verbose or explicit.\nCompared to the Python version, it looked a bit too cluttered.\nCan we do better?\nChris McDonald helped me to come up with the following representation:\n\nTree::new(15)\n .left(\n Tree::new(12)\n .right(Tree::new(13))\n )\n .right(\n Tree::new(22)\n .left(Tree::new(18))\n .right(Tree::new(100))\n );\n\nTo me, this is much easier on the eye.\nHere's the full tree implementation that makes this possible:\n\n#[derive(Default)]\nstruct Tree {\n root: i64,\n left: Option<Box<Tree>>,\n right: Option<Box<Tree>>,\n}\n\nimpl Tree {\n fn new(root: i64) -> Tree {\n Tree {\n root: root,\n ..Default::default()\n }\n }\n\n fn left(mut self, leaf: Tree) -> Self {\n self.left = Some(Box::new(leaf));\n self\n }\n\n fn right(mut self, leaf: Tree) -> Self {\n self.right = Some(Box::new(leaf));\n self\n }\n}\n\nUpdate: Danny Grein mentioned on Twitter, that\nwe can support the following syntax by implementing From<i64> for Tree:\n\nroot(15)\n .left(root(12).right(13))\n .right(root(22).left(18).right(100));\nWhy did it work in Python?\nNow you might be wondering, why our tree implementation worked so flawlessly in Python.\nThe reason is that Python dynamically allocates memory for the tree object at runtime.\nAlso, it wraps everything inside a PyObject, which is kind of similar to Rc from above\n— a reference counted smart pointer.\nRust is more explicit here. It gives us more flexibility to express our needs.\nThen again, we need to know about all the possible alternatives to make good use of them.\nIf you can, then stay away from smart pointers and stick to simple borrows.\nIf that's not possible, as seen above, choose the least invasive one for your\nuse-case. The Rust documentation is a good starting point here.\nAlso, read "Idiomatic tree and graph-like structures in Rust" for some clever use of allocators.\n" + "body": "Recently, I tried to implement a binary tree data structure in Rust. Each binary tree has a root value, a left, and a right subtree. I started from this Python implementation, which is quite straightforward. class Tree: def __init__(self, val, left=None, right=None): self.val = val self.left = left self.right = right This allows us to declare a fancy tree object like this: t = Tree(15, Tree(12, None, Tree(13)), Tree(22, Tree(18), Tree(100))) And the result can be visualized beautifully. (Yes I’ve drawn that myself.) A binary search tree representing our data structure Porting that code to Rust turned out to be a little… challenging. My first attempt looked quite innocuous. struct Tree root: i64, left: Tree, right: Tree, That’s pretty much a one-to-one translation of the Python definition — but rustc says no. error[E0072]: recursive type `Tree` has infinite size --&amp;gt; src&#x2F;main.rs:1:1 | 1 | struct Tree | ^^^^^^^^^^^ recursive type has infinite size | = help: insert indirection (e.g., a `Box`, `Rc`, or `&amp;amp;`) at some point to make `Tree` representable Coming from memory-managed languages (like Python, PHP, or Ruby), I was confused by this. The problem is easy to understand, though. Computers have a limited amount of memory. It’s the compiler’s job to find out how much memory to allocate for each item. In our case, it infers the following: A tree is a structure containing an i64, and two trees. Each of these trees is a structure containing an i64, and two trees. Each of these… You get the idea. Tree i64, Tree, Tree Tree i64, Tree ... , Tree ... &#x2F;&#x2F; The next expansion won&amp;#39;t fit on the page anymore Since we don’t know how many subtrees our tree will have, there is no way to tell how much memory we need to allocate up front. We’ll only know at runtime! Rust tells us how to fix that: by inserting an indirection like Box, Rc, or &amp;amp;. These are different “pointer types” in Rust. They all point to places in memory. So, instead of knowing the total size of our tree structure, we just know the point in memory where the tree is located. But that’s enough to define the tree structure. These pointer types allow us to do that safely and without manual memory management. They all offer different guarantees and you should choose the one that fits your requirements best. &amp;amp; is called a borrow in Rust speech. It’s the most common of the three. It’s a reference to some place in memory, but it does not own the data it points to. As such, the lifetime of the borrow depends on its owner. Therefore we would need to add lifetime parameters here. This can make it tedious to use. struct Tree&amp;lt;&amp;#39;a&amp;gt; root: i64, left: &amp;amp;&amp;#39;a Tree&amp;lt;&amp;#39;a&amp;gt;, right: &amp;amp;&amp;#39;a Tree&amp;lt;&amp;#39;a&amp;gt;, Box is a smart pointer with zero runtime overhead. It owns the data it points to and stores it on the heap. We call it smart because when it goes out of scope, it will first drop the data it points to and then itself. No manual memory management required, which is neat. ✨ struct Tree root: i64, left: Box&amp;lt;Tree&amp;gt;, right: Box&amp;lt;Tree&amp;gt;, Rc is another smart pointer. It’s short for “reference-counting”. It keeps track of the number of references to the data structure internally. As soon as the number of references is down to zero, it cleans up after itself. Choose Rc if you need to have multiple owners of the same data in one thread. For multithreading, there’s also Arc (atomic reference count). struct Tree root: i64, left: Rc&amp;lt;Tree&amp;gt;, right: Rc&amp;lt;Tree&amp;gt;, Putting the tree into a box All three options are totally valid. Which one you should choose, depends on your use-case. A rule of thumb is to keep it simple. In my case, I chose to use a Box, because I did not need any special guarantees. Making subtrees optional The next problem I faced was that I could not instantiate a tree structure. The left and right subtree have the type Box&amp;lt;Tree&amp;gt;, but at some point I would need an empty subtree. In the Python example, I used None to signal the end of my data structure. Thanks to Rust’s Option type we can do the same: struct Tree root: i64, left: Option&amp;lt;Box&amp;lt;Tree&amp;gt;&amp;gt;, right: Option&amp;lt;Box&amp;lt;Tree&amp;gt;&amp;gt;, After all of this, we can create our first tree: Tree root: 15, left: Some(Box::new(Tree root: 12, left: None, right: Some(Box::new(Tree root: 13, left: None, right: None, )), )), right: Some(Box::new(Tree root: 22, left: Some(Box::new(Tree root: 18, left: None, right: None, )), right: Some(Box::new(Tree root: 100, left: None, right: None, )), )), ; Depending on your point of view, you might say this is either verbose or explicit. Compared to the Python version, it looked a bit too cluttered for my taste. Can we do better? Chris McDonald helped me come up with the following representation: Tree::new(15) .left( Tree::new(12) .right(Tree::new(13)) ) .right( Tree::new(22) .left(Tree::new(18)) .right(Tree::new(100)) ); To me, this is much easier on the eye. Here’s the full tree implementation that makes this possible: #[derive(Default)] struct Tree root: i64, left: Option&amp;lt;Box&amp;lt;Tree&amp;gt;&amp;gt;, right: Option&amp;lt;Box&amp;lt;Tree&amp;gt;&amp;gt;, impl Tree fn new(root: i64) -&amp;gt; Tree Tree root: root, ..Default::default() fn left(mut self, leaf: Tree) -&amp;gt; Self self.left = Some(Box::new(leaf)); self fn right(mut self, leaf: Tree) -&amp;gt; Self self.right = Some(Box::new(leaf)); self Update: Danny Grein mentioned on Twitter, that we can support the following syntax by implementing From&amp;lt;i64&amp;gt; for Tree: root(15) .left( root(12) .right(13) ) .right( root(22) .left(18) .right(100) ); Why did it just work in Python? Now you might be wondering why our tree implementation worked so flawlessly in Python. The reason is that Python dynamically allocates memory for the tree object at runtime. Also, it wraps everything inside a PyObject, which is kind of similar to Rc from above — a reference-counted smart pointer. Rust is more explicit here. It gives us more flexibility to express our needs but we also need to know about all the possible alternatives to make good use of them. My advice is to stay away from smart pointers if a simple borrow will do. If lifetimes get in the way however or you need additional guarantees like thread-safety, smart pointers are a great addition to your toolkit. The Rust documentation is a good starting point to learn more about smart pointers. Also, read “Idiomatic tree and graph-like structures in Rust” for some clever use of allocators in case your tree needs to be mutable at runtime. " }, { "title": "Why Type Systems Matter", "url": "https://endler.dev/2017/why-type-systems-matter/", - "body": "I've written most of my code in dynamically typed languages such as Python or PHP. But ever since dabbling with Rust, I've developed a passion for static type systems.\nIt began to feel very natural to me; like a totally new way to express myself.\n\nTypes are here to help\nWith types, you communicate your guarantees and expectations. Both, to the machine and other developers. Types express intent.\nAs a programmer, you've probably gained some intuition about types.\n\nsentence = "hello world"\n\nYou might guess that sentence is a string. It's in quotes, after all. \nIt gets a little more tricky if the type gets inferred from some other location.\n\nsentence = x\n\nIs sentence still a string? Uhm... we don't know. It depends on the type of x. Maybe x is a number, and so sentence is also a number? Maybe x used to be a string but during refactoring it is now a byte array? Fun times had by all. 🎉\nWhat about this one?\n\nfilesize = "5000" # Size in bytes\n\nHere, we express a file size as a string.\nWhile this might work, it's an unsettling idea.\nEven simple calculations might lead to unexpected results:\n\nfile1 = "5000"\nfile2 = "3000"\ntotal = file1 + file2\nprint(total) # prints '50003000'\n\nHow can we fix that?\nWe can safely assume that a file size is always a number.\nTo be more precise, it must be a positive, natural number.\nThere can be no negative file size, and our smallest block of memory is one byte\n(on all but the most obscure systems).\nAnd since we're dealing with a discrete machine here, we know it can only be\na filesize the computer can handle.\nIf we only could express all of this in a precise way...?\nThis is where type systems enter the stage.\nIn Rust, you could define a File type with a field named size.\n\nstruct File {\n name: String,\n size: usize,\n}\n\nThe usize gives you the guarantee to be always big enough to hold any pointer into memory (on 64 bit computers usize = u64).\nNow there is no more ambiguity about the type of size.\nYou can't even create an invalid file object:\n\n// Error: `size` can't be a string.\nlet weird_file = File { name: 123, size: "hello" };\n\nThe type system will prevent invalid state. It will simply not allow you to\nbreak your own rules. It will hold you accountable for your design choices.\nDare I say it: it becomes an extension of your brain.\nAfter some time you start to rely on the type checker. "If it compiles, it runs"\nis a powerful mantra.\nTypes improve readability and provide context\nConsider the following Python snippet:\n\ndef filter_files(files):\n matches = []\n for file in files:\n if file.status == 0:\n matches.append(file)\n return matches\n\nWhat does 0 represent?\nWe can't say. We lack the context!\nThe story gets a little clearer once we define an enum type like this:\n\nfrom enum import Enum\n\nclass FileStatus(Enum):\n OPEN = 0\n CLOSED = 1\n\nOur example from above becomes\n\ndef filter_files(files):\n matches = []\n for file in files:\n if file.status == FileStatus.OPEN:\n matches.append(file)\n return matches\n\nIn a larger codebase, FileStatus.OPEN is much easier to search for than 0.\nNote: The native enum type was introduced very late in the history of Python. It serves as a nice\nexample of how enhancing the type system can help improve readability.\nWhen you combine different types, magic happens.\nAll pieces suddenly fall into place when you choose your types wisely. Out of nowhere, the compiler will start\nchecking your design decisions and if all your types work well together. It will point out flaws in your mental model.\nThis gives you a great amount of confidence during refactoring.\nFor example, let's think about sorting things.\nWhen I think of sorting, I first think about a list of numbers:\n\nsorted([1,5,4,3,2]) # [1,2,3,4,5]\n\nThat's the happy path. How about this one?\n\nsorted(1)\n\nOuch. This can't work because 1 is a single number and not a collection!\nIf we forget to check the type before we pass it to sorted, we get an error\nwhile the program runs.\n\nsorted([1, "fish"])\n\nIn Python 2, this would result in [1, 'fish'] (because strings will be compared by length)\nEdit: Reddit user jcdyer3 pointed out that the reason is that when incomparable types are compared, they sort by their type, so all ints will come before all strings. It's a CPython implementation detail).\n\n \n \n 1 < fish according to Python 2\n \n Illustration provided by Freepik\n \n\nSince Python 3, this throws an Exception.\n\nTypeError: '<' not supported between instances of 'str' and 'int'\n\nMuch better! One less source of error. The problematic thing is though, that this happens at runtime.\nThat's because of Python's dynamic typing.\nWe could have avoided that with a statically typed language.\n\nfn sorted<T>(collection: &mut [T]) where T: PartialOrd {\n // TODO: Sort the collection here.\n}\n\nLooks scary but it really isn't.\nWe define a function named sorted which takes one input parameter named\ncollection.\nThe type of collection consists of four parts:\n\nThe & means that we "borrow" the collection, we don't own it. After the function returns, it will still exist. It won't be cleaned up.\nThe mut means that the collection is mutable. We are allowed to modify it.\n[T] indicates that we expect a list/slice/vector as input. Everything else\nwill be rejected at compile time (before the program even runs).\nPartialOrd is\nthe magic sauce. It is a trait, which is something like an interface. It means that all elements T in the collection must be partially ordered.\n\nAll of this information helps the compiler to prevent us from shooting ourselves in the foot.\nAnd we can understand the inputs and outputs of the function without looking elsewhere.\nTakeaways\n\nTypes force developers to do their homework and think about the guarantees and limitations of their code.\nDon't think of types as constraints, think of them as a safety net which will protect you from your own flawed mental models.\nAlways choose the type which most precisely expresses your intentions.\nIf there is no perfect type in the standard library, create your own from simpler types.\n\nFollowing these rules, I found that I was magically guided towards the most elegant representation of my ideas.\nMy code became much more idiomatic.\n" + "body": "I’ve written most of my code in dynamically typed languages such as Python or PHP; but ever since dabbling with Rust, I’ve developed a passion for static type systems. It began to feel very natural to me; like a totally new way to express myself. Types are here to help With types, you communicate your guarantees and expectations. Both, to the machine and other developers. Types express intent. As a programmer, you’ve probably gained some intuition about types. sentence = &amp;quot;hello world&amp;quot; You might guess that sentence is a string. It’s in quotes, after all. It gets a little more tricky if the type gets inferred from some other location. sentence = x Is sentence still a string? Uhm… we don’t know. It depends on the type of x. Maybe x is a number, and so sentence is also a number? Maybe xused to be a string but during refactoring it is now a byte array? Fun times had by all. 🎉 What about this one? filesize = &amp;quot;5000&amp;quot; # Size in bytes Here, we express a file size as a string. While this might work, it’s an unsettling idea. Even simple calculations might lead to unexpected results: file1 = &amp;quot;5000&amp;quot; file2 = &amp;quot;3000&amp;quot; total = file1 + file2 print(total) # prints &amp;#39;50003000&amp;#39; How can we fix that? We can safely assume that a file size is always a number. To be more precise, it must be a positive, natural number. There can be no negative file size, and our smallest block of memory is one byte (on all but the most obscure systems). And since we’re dealing with a discrete machine here, we know it can only be a filesize the computer can handle. If we only could express all of this in a precise way…? This is where type systems enter the stage. In Rust, you could define a File type with a field named size. struct File name: String, size: usize, The usize gives you the guarantee to be always big enough to hold any pointer into memory (on 64 bit computers usize = u64). Now there is no more ambiguity about the type of size. You can’t even create an invalid file object: &#x2F;&#x2F; Error: `size` can&amp;#39;t be a string. let weird_file = File name: 123, size: &amp;quot;hello&amp;quot; ; The type system will prevent invalid state. It will simply not allow you to break your own rules. It will hold you accountable for your design choices. Dare I say it: it becomes an extension of your brain. After some time you start to rely on the type checker. “If it compiles, it runs” is a powerful mantra. Types improve readability and provide context Consider the following Python snippet: def filter_files(files): matches = [] for file in files: if file.status == 0: matches.append(file) return matches What does 0 represent? We can’t say. We lack the context! The story gets a little clearer once we define an enum type like this: from enum import Enum class FileStatus(Enum): OPEN = 0 CLOSED = 1 Our example from above becomes def filter_files(files): matches = [] for file in files: if file.status == FileStatus.OPEN: matches.append(file) return matches In a larger codebase, FileStatus.OPEN is much easier to search for than 0. Note: The native enum type was introduced very late in the history of Python. It serves as a nice example of how enhancing the type system can help improve readability. When you combine different types, magic happens. All pieces suddenly fall into place when you choose your types wisely. Out of nowhere, the compiler will start checking your design decisions and if all your types work well together. It will point out flaws in your mental model. This gives you a great amount of confidence during refactoring. For example, let’s think about sorting things. When I think of sorting, I first think about a list of numbers: sorted([1,5,4,3,2]) # [1,2,3,4,5] That’s the happy path. How about this one? sorted(1) Ouch. This can’t work because 1 is a single number and not a collection! If we forget to check the type before we pass it to sorted, we get an error while the program runs. sorted([1, &amp;quot;fish&amp;quot;]) In Python 2, this would result in [1, fish] (because strings will be compared by length) Edit: Reddit user jcdyer3 pointed out that the reason is that when incomparable types are compared, they sort by their type, so all ints will come before all strings. It’s a CPython implementation detail). 1 &amp;lt; fish according to Python 2 Source: Illustration provided by Freepik Since Python 3, this throws an Exception. TypeError: &amp;#39;&amp;lt;&amp;#39; not supported between instances of &amp;#39;str&amp;#39; and &amp;#39;int&amp;#39; Much better! One less source of error. The problematic thing is though, that this happens at runtime. That’s because of Python’s dynamic typing. We could have avoided that with a statically typed language. fn sorted&amp;lt;T&amp;gt;(collection: &amp;amp;mut [T]) where T: PartialOrd &#x2F;&#x2F; TODO: Sort the collection here. Looks scary but it really isn’t. We define a function named sorted which takes one input parameter named collection. The type of collection consists of four parts: The &amp;amp; means that we “borrow” the collection, we don’t own it. After the function returns, it will still exist. It won’t be cleaned up. The mut means that the collection is mutable. We are allowed to modify it. [T] indicates that we expect a list&#x2F;slice&#x2F;vector as input. Everything else will be rejected at compile time (before the program even runs). PartialOrd is the magic sauce. It is a trait, which is something like an interface. It means that all elements T in the collection must be partially ordered. All of this information helps the compiler to prevent us from shooting ourselves in the foot. And we can understand the inputs and outputs of the function without looking elsewhere. Takeaways Types force developers to do their homework and think about the guarantees and limitations of their code. Don’t think of types as constraints, think of them as a safety net which will protect you from your own flawed mental models. Always choose the type which most precisely expresses your intent. If there is no perfect type in the standard library, create your own from simpler types. Following these rules, I found that I was magically guided towards the most elegant representation of my ideas. My code became much more idiomatic. " }, { "title": "Being a Professional Programmer", "url": "https://endler.dev/2017/professional-programming/", - "body": "When I was around 12, I set myself the goal to become a professional programmer.\nI can tell, because at this time I made the conscious decision to use my right hand to control the mouse — even though I'm left-handed.\nMy reasoning was, that if I ever had to help out a colleague with a computer problem I sure did not want to move her mouse to the other side before getting started. That would be awkward. \n(Of course I did not foresee the advent of the wireless mouse... As a matter of fact, I still use the right hand out of habit.)\nOne thing I always wanted to know is how a typical workday of a programmer looked like.\nWas I wasting my time by pursuing this career?\nOnly later I found the answer — but I had to become a professional programmer myself.\nThis article aims to save you from a few years of uncertainty.\nBefore you dig into this, be sure to read the first part of this series titled "Why I love Programming".\nWhat's the difference between "professional" and "hobby" programming?\nIn one word: accountability.\nYou are expected to be responsible.\nProgramming in your free time is like throwing a party without having to clean up: pure fun!\nIf you get bored you're free to move on.\nNot so in professional programming, where you're expected to get the job done.\nEvery application requires constant bug fixing, refactoring and sometimes even monkey patching. Maintaining code is no amusement park; especially if it's not your own.\nBeing a Junior Developer\nFresh out of school you might think you're a pretty kick-ass programmer. Let me tell you: you're not.\nYou wouldn't guess what talented people can do with these blinking machines.\nYou'll have tons of things to learn in the first few years.\nProfessional software development is a lengthy process. Writing readable, well-tested, well-documented code is a substantial effort. You will need patience, lots of it. Both, with yourself and with others.\nAs a junior, you only think in black and white. You look at some code, and it's all wrong. Who in their right mind created this horrible monstrosity?!\nAs you become more experienced, you'll see the shades of grey.\nEventually, you'll understand that those neckbeards were not slower than you, but\nmore careful. You learn how to test your code, how to document it. You even begin to\nappreciate UML diagrams.\nBecoming obsolete\n"The world is moving too fast. What you learned today is obsolete tomorrow. Why bother?".\nI've heard that saying countless times throughout my career.\nIt's both, popular and wrong.\nIf a skill becomes obsolete, it's not a skill.\nThroughout your career you don't want to be known as "the Jenkins guy", you want to be the\nexpert in Software Quality. Hint: If you don't know what Jenkins is, that's the\nwhole point. You should not narrow down your scope too much.\nThe right skills never become obsolete.\nFrom time to time it happens, that due to some new company policy your beautiful creation will become obsolete.\nAs depressing as it sounds: it's a regular part of the software business.\nYou need to adapt.\nOne advice I can give you is not to take it too seriously.\nDrop the project, keep the wisdom.\nEmbrace change.\nWriting software in a non-perfect world\nA professional programmer has to deal with deficiencies all the time. The game is called "balancing constraints". Deadlines, budgets, and code quality are just a few competing constraints we have to consider.\nElegant designs fade away in the face of reality.\nIn the end you want to earn money with your software, so you have to ship it!\nThe best developers I know, keep the balance between pragmatism and elegance.\nThey know which parts matter and which don't. Those who don't will be replaced\nwhen there's a need.\nFor me, I was always leaning more towards elegance.\nThat's just a nicer way to say I was a perfectionist.\nI needed to learn the pragmatic part through hard work.\nMentoring less experienced Programmers\n\nThe better you become at programming, the less you code.\n\nInstead, you will spend more time thinking about Software Architecture,\nhigh-level designs and splitting up the work into smaller junks for other developers to consume.\nYou will start mentoring Junior Developers. Recruiting will require a lot of your\nattention. You will spend your time in Meetings, discussing project goals with\nbusiness people.\nOne might say, you take the role of a mediator. Others might call you a manager.\nOnce you know the ins and outs of the business, you are an essential asset for\nthe company. You might get asked to become a manager, or at least managing projects will slowly feel like a natural extension of your responsibilities.\nBut beware! This slow and gradual process is dangerous.\nMoving back to being a full-time programmer is not easy. \nDuring the time you were busy with project management, others were busy improving their\ncoding skills.\nYou can try to keep up-to-date in your free time but that's hard.\nI've seen excellent developers become great managers. At some point in your career\nit's a decision you need to make for yourself.\nHowever you decide, it pays off to invest some time into learning how to\ncommunicate. Empathy plays a prominent role in that.\nDeveloping software as a team is so complicated that a lot of time is spent on aligning goals and communicating problems. In fact, communication is what you get paid for. This includes documentation, tests and the code itself.\nTalk to others, listen to their problems. Read books about Software Project\nManagement, even though you don't want to be a manager yourself. It will help\nyou understand the role of your boss.\nA word about money\nThere are many good reasons to work in IT, but money is not one of them.\nWhile it can be tempting to base your career decisions on prospective salary,\ndon't do it. You will be very unhappy. You will spend eight hours or more each day sitting in front of a blinking cursor.\nThat's a lot of time, and time is much more valuable than money.\nDon't get me wrong. There's plenty of jobs that pay well. \nYou will most likely not get rich, though. If you want\nto make it big, I can't help you. Maybe look into Real Estate or so...\nThe only way to get rich as a developer is to work on something really hard, put in lots of hours and get\nlucky. Startups, basically. Keep in mind: One Bill Gates takes a thousand failed\nattempts.\nAnother way is to stop being a programmer and become a manager instead. \nI've already shared my opinion on that in the last section.\nFinal words\nWhile you should learn to read (and maybe write) code, working as a professional programmer is not for everyone.\nYou might ask: "Is it worth it?". \nFor me it was the right decision. Hopefully I could help you to make your own.\n" + "body": "When I was around 12, I set myself the goal to become a professional programmer. I can tell, because at this time I made the conscious decision to use my right hand to control the mouse — even though I’m left-handed. My reasoning was, that if I ever had to help out a colleague with a computer problem I sure did not want to move her mouse to the other side before getting started. That would be awkward. (Of course I did not foresee the advent of the wireless mouse… As a matter of fact, I still use the right hand out of habit.) One thing I always wanted to know is how a typical workday of a programmer looked like. Was I wasting my time by pursuing this career? Only later I found the answer — but I had to become a professional programmer myself. This article aims to save you from a few years of uncertainty. Before you dig into this, be sure to read the first part of this series titled “Why I love Programming”. What’s the difference between “professional” and “hobby” programming? In one word: accountability. You are expected to be responsible. Programming in your free time is like throwing a party without having to clean up: pure fun! If you get bored you’re free to move on. Not so in professional programming, where you’re expected to get the job done. Every application requires constant bug fixing, refactoring and sometimes even monkey patching. Maintaining code is no amusement park; especially if it’s not your own. Being a Junior Developer Fresh out of school you might think you’re a pretty kick-ass programmer. Let me tell you: you’re not. You wouldn’t guess what talented people can do with these blinking machines. You’ll have tons of things to learn in the first few years. Professional software development is a lengthy process. Writing readable, well-tested, well-documented code is a substantial effort. You will need patience, lots of it. Both, with yourself and with others. As a junior, you only think in black and white. You look at some code, and it’s all wrong. Who in their right mind created this horrible monstrosity?! As you become more experienced, you’ll see the shades of grey. Eventually, you’ll understand that those neckbeards were not slower than you, but more careful. You learn how to test your code, how to document it. You even begin to appreciate UML diagrams. Becoming obsolete “The world is moving too fast. What you learned today is obsolete tomorrow. Why bother?”. I’ve heard that saying countless times throughout my career. It’s both, popular and wrong. If a skill becomes obsolete, it’s not a skill. Throughout your career you don’t want to be known as “the Jenkins guy”, you want to be the expert in Software Quality. Hint: If you don’t know what Jenkins is, that’s the whole point. You should not narrow down your scope too much. The right skills never become obsolete. From time to time it happens, that due to some new company policy your beautiful creation will become obsolete. As depressing as it sounds: it’s a regular part of the software business. You need to adapt. One advice I can give you is not to take it too seriously. Drop the project, keep the wisdom. Embrace change. Writing software in a non-perfect world A professional programmer has to deal with deficiencies all the time. The game is called “balancing constraints”. Deadlines, budgets, and code quality are just a few competing constraints we have to consider. Elegant designs fade away in the face of reality. In the end you want to earn money with your software, so you have to ship it! The best developers I know, keep the balance between pragmatism and elegance. They know which parts matter and which don’t. Those who don’t will be replaced when there’s a need. For me, I was always leaning more towards elegance. That’s just a nicer way to say I was a perfectionist. I needed to learn the pragmatic part through hard work. Mentoring less experienced Programmers The better you become at programming, the less you code. Instead, you will spend more time thinking about Software Architecture, high-level designs and splitting up the work into smaller junks for other developers to consume. You will start mentoring Junior Developers. Recruiting will require a lot of your attention. You will spend your time in Meetings, discussing project goals with business people. One might say, you take the role of a mediator. Others might call you a manager. Once you know the ins and outs of the business, you are an essential asset for the company. You might get asked to become a manager, or at least managing projects will slowly feel like a natural extension of your responsibilities. But beware! This slow and gradual process is dangerous. Moving back to being a full-time programmer is not easy. During the time you were busy with project management, others were busy improving their coding skills. You can try to keep up-to-date in your free time but that’s hard. I’ve seen excellent developers become great managers. At some point in your career it’s a decision you need to make for yourself. However you decide, it pays off to invest some time into learning how to communicate. Empathy plays a prominent role in that. Developing software as a team is so complicated that a lot of time is spent on aligning goals and communicating problems. In fact, communication is what you get paid for. This includes documentation, tests and the code itself. Talk to others, listen to their problems. Read books about Software Project Management, even though you don’t want to be a manager yourself. It will help you understand the role of your boss. A word about money There are many good reasons to work in IT, but money is not one of them. While it can be tempting to base your career decisions on prospective salary, don’t do it. You will be very unhappy. You will spend eight hours or more each day sitting in front of a blinking cursor. That’s a lot of time, and time is much more valuable than money. Don’t get me wrong. There’s plenty of jobs that pay well. You will most likely not get rich, though. If you want to make it big, I can’t help you. Maybe look into Real Estate or so… The only way to get rich as a developer is to work on something really hard, put in lots of hours and get lucky. Startups, basically. Keep in mind: One Bill Gates takes a thousand failed attempts. Another way is to stop being a programmer and become a manager instead. I’ve already shared my opinion on that in the last section. Final words While you should learn to read (and maybe write) code, working as a professional programmer is not for everyone. You might ask: “Is it worth it?”. For me it was the right decision. Hopefully I could help you to make your own. " }, { "title": "The Future of Rust", "url": "https://endler.dev/2017/future-of-rust/", - "body": "Let me first point out the obvious: yes, the title is a little sensationalist. Also\nyou might be asking why I should be entitled to talk about the future of Rust. After\nall, I'm neither part of the Rust core team, nor a major contributor to the Rust\necosystem. To that I answer: why not? It's fun to think about the future of\nsystems programming in general and Rust in particular.\n\n \n \n Ferris is the inofficial Rust mascot\n \n Illustration provided by zooenvato for FreePik.com\n \n\nYou might have heard of the near-term goals that the core team has committed itself to. Faster compile times and a more gentle learning curve come to mind.\nThis post is not about that.\nInstead, I want to explore some more exotic areas where Rust could shine in\nfive to ten years from now. To make it big, we need both, roots and wings.\nData Science\nRight now, the most popular languages for Data Science are Python, Java, R, and C++.\n\n \n Programming language popularity for data science (Source).\n \n\nWe've observed that while prototypes are mostly written in dynamically typed\nlanguages like Python and R, once an algorithm reaches production level quality\nit is often rewritten in faster languages such as C++ for scalability.\nIt is not unthinkable that Rust is going to be some healthy competition for C++ in the near future.\nThe benchmarks of leaf, a machine learning library written in Rust, are already nothing short of\nimpressive.\nBlockbuster games\nGames are another area where Rust might shine. \nIt's financially attractive for Game Studios to support multiple platforms without much\neffort. Cargo and rustup make cross-compiling easy.\nModern libraries slowly fill the tooling gaps for large-scale game development.\nRust's support for the Vulkan 3D graphics API might already be the best of class.\nThe killer feature though is the unique combination of safety and performance.\nIf you ship a game to a million players and they throw money at you, you'll better make sure that it doesn't crash... right?\nThat said, the first AAA Rust game might still be far in the future. Here's Blizzard's standpoint on Rust in 2017.\nSystems Engineering\nMaybe — eventually — we will also see formal verification of the Rust core. Projects like RustBelt would then open new opportunities in safety-focused industries like the Space industry. Wouldn't it be nice to safely land a Spacecraft on Mars that is controlled by Rust? (Or by one of its spiritual successors.)\nI wonder if SpaceX is experimenting with Rust already...\nIntegrating with other languages\nThere are many other areas I haven't even mentioned yet. For example, financial and medical software or Scientific Computing, just to name a few.\nIn all cases, Rust might be a good fit. Right now the biggest barrier to entry \nis probably the huge amount of legacy code. Many industries maintain large codebases in Cobol,\nC or Fortran that are not easily rewritten.\nFortunately, Rust has been proven to work very nicely with other languages. \nPartly because of strong C-compatibility and partly because there is no Runtime or Garbage Collector.\nA typical pattern is to optimize some core part of an application in Rust that has hard safety/performance\nrequirements, while leaving the rest untouched.\nI think this symbiosis will only become stronger in the long run.\nThere are even ambitious projects like Corrode which attempt to translate C code to Rust automatically.\nSummary\nOverall I see huge potential for Rust in areas where safety, performance or total control over the machine are essential. With languages like Rust and Crystal, a whole class of errors is a thing of the past. No null pointers, no segmentation faults, no memory leaks, no data races.\nI find it encouraging that future generations of programmers will take all that for granted.\n" + "body": "Let me first point out the obvious: yes, the title is a little sensationalist. Also you might be asking why I should be entitled to talk about the future of Rust. After all, I’m neither part of the Rust core team, nor a major contributor to the Rust ecosystem. To that I answer: why not? It’s fun to think about the future of systems programming in general and Rust in particular. Ferris is the inofficial Rust mascot Source: Illustration provided by FreePik.com You might have heard of the near-term goals that the core team has committed itself to. Faster compile times and a more gentle learning curve come to mind. This post is not about that. Instead, I want to explore some more exotic areas where Rust could shine in five to ten years from now. To make it big, we need both, roots and wings. Data Science Right now, the most popular languages for Data Science are Python, Java, R, and C++. Programming language popularity for data science (Source). We’ve observed that while prototypes are mostly written in dynamically typed languages like Python and R, once an algorithm reaches production level quality it is often rewritten in faster languages such as C++ for scalability. It is not unthinkable that Rust is going to be some healthy competition for C++ in the near future. The benchmarks of leaf, a machine learning library written in Rust, are already nothing short of impressive. Blockbuster games Games are another area where Rust might shine. It’s financially attractive for Game Studios to support multiple platforms without much effort. Cargo and rustup make cross-compiling easy. Modern libraries slowly fill the tooling gaps for large-scale game development. Rust’s support for the Vulkan 3D graphics API might already be the best of class. The killer feature though is the unique combination of safety and performance. If you ship a game to a million players and they throw money at you, you’ll better make sure that it doesn’t crash… right? That said, the first AAA Rust game might still be far in the future. Here’s Blizzard’s standpoint on Rust in 2017. Systems Engineering Maybe — eventually — we will also see formal verification of the Rust core. Projects like RustBelt would then open new opportunities in safety-focused industries like the Space industry. Wouldn’t it be nice to safely land a Spacecraft on Mars that is controlled by Rust? (Or by one of its spiritual successors.) I wonder if SpaceX is experimenting with Rust already… Integrating with other languages There are many other areas I haven’t even mentioned yet. For example, financial and medical software or Scientific Computing, just to name a few. In all cases, Rust might be a good fit. Right now the biggest barrier to entry is probably the huge amount of legacy code. Many industries maintain large codebases in Cobol, C or Fortran that are not easily rewritten. Fortunately, Rust has been proven to work very nicely with other languages. Partly because of strong C-compatibility and partly because there is no Runtime or Garbage Collector. A typical pattern is to optimize some core part of an application in Rust that has hard safety&#x2F;performance requirements, while leaving the rest untouched. I think this symbiosis will only become stronger in the long run. There are even ambitious projects like Corrode which attempt to translate C code to Rust automatically. Summary Overall I see huge potential for Rust in areas where safety, performance or total control over the machine are essential. With languages like Rust and Crystal, a whole class of errors is a thing of the past. No null pointers, no segmentation faults, no memory leaks, no data races. I find it encouraging that future generations of programmers will take all that for granted. " }, { "title": "Launching a URL Shortener in Rust using Rocket", "url": "https://endler.dev/2017/rust-url-shortener/", - "body": "One common Systems Design task in interviews is to sketch the software architecture of a URL shortener (a bit.ly clone, so to say).\nSince I was playing around with Rocket, why not give it a try?\n\n\n \n \n A rocket travelling through space\n \n\nRequirements\nA URL shortener has two main responsibilities:\n\nCreate a shorter URL from a longer one (d'oh)\nRedirect to the longer link when the short link is requested.\n\nLet's call our service rust.ly (Hint, hint: the domain is still available at the time of writing...).\nFirst, we create a new Rust project:\n\ncargo new --bin rustly\n\nNext, we add Rocket to our Cargo.toml:\n\n[dependencies]\nrocket = "0.2.4"\nrocket_codegen = "0.2.4"\n\nWarning: Most likely you need to get the very newest Rocket version.\nOtherwise, you might get some entertaining error messages. Check out the newest\nversion from crates.io.\nSince Rocket requires cutting-edge Rust features, we need to use a recent nightly\nbuild. Rustup provides a simple way to switch between stable and nightly.\n\nrustup update && rustup override set nightly\nA first prototype\nNow we can start coding our little service.\nLet's first write a simple "hello world" skeleton to get started.\nPut this into src/main.rs:\n\n#![feature(plugin)]\n#![plugin(rocket_codegen)]\n\nextern crate rocket;\n\n#[get("/<id>")]\nfn lookup(id: &str) -> String {\n format!("⏩ You requested {}. Wonderful!", id)\n}\n\n#[get("/<url>")]\nfn shorten(url: &str) -> String {\n format!("💾 You shortened {}. Magnificient!", url)\n}\n\nfn main() {\n rocket::ignite().mount("/", routes![lookup])\n .mount("/shorten", routes![shorten])\n .launch();\n}\n\nUnder the hood, Rocket is doing some magic to enable this nice syntax.\nMore specifically, we use the rocket_codegen crate for that.\n(It's implemented as a compiler plugin, which is also the reason why we need to use nightly Rust.)\nIn order to bring the rocket library into scope, we write extern crate rocket;.\nWe defined the two routes for our service. Both routes will respond to a GET request.\nThis is done by adding an attribute named get to a function.\nThe attribute can take additional arguments.\nIn our case, we define an id variable for the lookup endpoint and a url variable for the shorten endpoint. \nBoth variables are Unicode string slices. Since Rust has awesome Unicode support, we respond with a nice emoji just to show off. 🕶\nLastly, we need a main function which launches Rocket and mounts our two routes. This way, they become publicly available.\nIf you want to know even more about the in-depth details, I may refer you to the official Rocket documentation.\nLet's check if we're on the right track by running the application.\n\ncargo run\n\nAfter some compiling, you should get some lovely startup output from Rocket:\n\n🔧 Configured for development.\n => address: localhost\n => port: 8000\n => log: normal\n => workers: 8\n🛰 Mounting '/':\n => GET /<hash>\n🛰 Mounting '/shorten':\n => GET /shorten/<url>\n🚀 Rocket has launched from http://localhost:8000...\n\nSweet! Let's call our service.\n\n> curl localhost:8000/shorten/www.matthias-endler.de\n💾 You shortened www.matthias-endler.de. Magnificient!\n\n> curl localhost:8000/www.matthias-endler.de\n⏩ You requested www.matthias-endler.de. Wonderful!\n\nSo far so good.\nData storage and lookup\nWe need to keep the shortened URLs over many requests... but how?\nIn a production scenario, we could use some NoSQL data store like Redis for that.\nSince the goal is to play with Rocket and learn some Rust, we will simply use an\nin-memory store.\nRocket has a feature called managed state.\nIn our case, we want to manage a repository of URLs.\nFirst, let's create a file named src/repository.rs:\n\nuse std::collections::HashMap;\nuse shortener::Shortener;\n\npub struct Repository {\n urls: HashMap<String, String>,\n shortener: Shortener,\n}\n\nimpl Repository {\n pub fn new() -> Repository {\n Repository {\n urls: HashMap::new(),\n shortener: Shortener::new(),\n }\n }\n\n pub fn store(&mut self, url: &str) -> String {\n let id = self.shortener.next_id();\n self.urls.insert(id.to_string(), url.to_string());\n id\n }\n\n pub fn lookup(&self, id: &str) -> Option<&String> {\n self.urls.get(id)\n }\n}\n\nWithin this module we first import the HashMap implementation from the standard library.\nWe also include shortener::Shortener;, which will help us to shorten the URLs in the next step. Don't worry too much about that for now.\nBy convention, we implement a new() method to create a new Repository struct with an empty HashMap and a new Shortener. Additionally, we have two methods, store and lookup. \nstore takes a URL and writes it to our in-memory HashMap storage. It uses our yet to be defined shortener to create a unique id. It returns the shortened ID for the entry.\nlookup gets a given ID from the storage and returns it as an Option. If the ID is found, the return value will be Some(url), if there is no match it will return None. \nNote that we convert the string slices (&str) to String using the to_string() method. This way we don't need to deal with lifetimes. As a beginner, don't think too hard about them.\nAdvanced remarks (can safely be skipped)\nA seasoned (Rust) Programmer might do a few things differently here. Did you notice the tight coupling between the repository and the shortener? In a production system, Repository and Shortener might simply be concrete implementations of traits (which are a bit like interfaces in other languages, but more powerful). For example, Repository would implement Cache trait:\n\ntrait Cache {\n // Store an entry and return an ID\n fn store(&mut self, data: &str) -> String;\n // Look up a previously stored entry\n fn lookup(&self, id: &str) -> Option<&String>;\n}\n\nThis way we get a clear interface, and we can easily switch to a different implementation (e.g. a RedisCache). Also, we could have a MockRepository to simplify testing. Same for Shortener.\nYou might also want to use the Into trait to support both, &str and String as a parameter of store:\n\npub fn store<T: Into<String>>(&mut self, url: T) -> String {\n\t\tlet id = self.shortener.shorten(url);\n\t\tself.urls.insert(id.to_owned(), url.into());\n\t\tid\n}\n\nIf you're curious about this, read this article from Herman J. Radtke III.\nFor now, let's keep it simple.\nActually shortening URLs\nLet's implement the URL shortener itself.\nYou might be surprised how much was written about URL shortening all over the web.\nOne common way is to create short urls using base 62 conversion.\nAfter looking around some more, I found this sweet crate called harsh, which perfectly fits the bill.\nTo use harsh, we add it to the dependency section of our Cargo.toml:\n\nharsh = "0.1.2"\n\nNext, we add the crate to the top of to our main.rs:\n\nextern crate harsh;\n\nLet's create a new file named src/shortener.rs and write the following:\n\nuse harsh::{Harsh, HarshBuilder};\n\npub struct Shortener {\n id: u64,\n generator: Harsh,\n}\n\nimpl Shortener {\n pub fn new() -> Shortener {\n let harsh = HarshBuilder::new().init().unwrap();\n Shortener {\n id: 0,\n generator: harsh,\n }\n }\n\n pub fn next_id(&mut self) -> String {\n let hashed = self.generator.encode(&[self.id]).unwrap();\n self.id += 1;\n hashed\n }\n}\n\nWith use harsh::{Harsh, HarshBuilder}; we bring the required structs into scope. Then we define our own Shortener struct, which wraps Harsh. It has two fields: id stores the next id for shortening. (Since there are no negative ids, we use an unsigned integer for that.) The other field is the generator itself, for which we use Harsh. \nUsing the HarshBuilder you can do a lot of fancy stuff, like setting a custom alphabet for the ids. For more info, check out the official docs.\nWith next_id we retrieve a new String id for our URLs.\nAs you can see, we don't pass the URL to next_id. That means we actually don't shorten anything. We merely create a short, unique ID. That's because most hashing algorithms produce fairly long URLs and having short URLs is the whole idea.\nWiring it up\nSo we are done with our shortener and the repository.\nWe need to adjust our src/main.rs again to use the two.\nThis is the point where it gets a little hairy.\nI have to admit that I struggled a bit here.\nMainly because I was not used to multi-threaded request handling. In Python or\nPHP you don't need to think about shared-mutable access.\nInitially I had the following code in my main.rs:\n\n#[get("/<url>")]\nfn store(repo: State<Repository>, url: &str) {\n repo.store(url);\n}\n\nfn main() {\n rocket::ignite().manage(Repository::new())\n .mount("/store", routes![store])\n .launch();\n}\n\nState is the built-in way to save data across requests in Rocket. Just tell it what belongs to your application state with manage() and Rocket will automatically inject it into the routes.\nBut the compiler did not like that:\n\nerror: cannot borrow immutable borrowed content as mutable\n --> src/main.rs\n |\n | repo.store(url);\n | ^^^^ cannot borrow as mutable\n\nWhat would happen if two requests wanted to modify our repository at the same time?\nRust prevented a race condition here!\nAdmittedly the error message could have been a bit more user-friendly, though.\nFortunately, Sergio Benitez helped me out on the Rocket IRC channel (thanks again!).\nThe solution was to put the repository behind a Mutex.\nHere is the full src/main.rs in its entirety:\n\n#![feature(plugin, custom_derive)]\n#![plugin(rocket_codegen)]\n\nextern crate rocket;\nextern crate harsh;\n\nuse std::sync::RwLock;\nuse rocket::State;\nuse rocket::request::Form;\nuse rocket::response::Redirect;\n\nmod repository;\nmod shortener;\nuse repository::Repository;\n\n#[derive(FromForm)]\nstruct Url {\n url: String,\n}\n\n#[get("/<id>")]\nfn lookup(repo: State<RwLock<Repository>>, id: &str) -> Result<Redirect, &'static str> {\n match repo.read().unwrap().lookup(id) {\n Some(url) => Ok(Redirect::permanent(url)),\n _ => Err("Requested ID was not found.")\n }\n}\n\n#[post("/", data = "<url_form>")]\nfn shorten(repo: State<RwLock<Repository>>, url_form: Form<Url>) -> Result<String, String> {\n let ref url = url_form.get().url;\n let mut repo = repo.write().unwrap();\n let id = repo.store(&url);\n Ok(id.to_string())\n}\n\nfn main() {\n rocket::ignite().manage(RwLock::new(Repository::new()))\n .mount("/", routes![lookup, shorten])\n .launch();\n}\n\nAs you can see we're using a std::sync::RwLock here, to protect our repository from shared mutable access. This type of lock allows any number of readers or at most one writer at the same time.\nIt makes our code a bit harder to read because whenever we want to access our repository, we need to call the read and write methods first.\nIn our lookup method, you can see that we are returning a Result type now. It has two cases: if we find an id in our repository, we return Ok(Redirect::permanent(url)), which will take care of the redirect. If we can't find the id, we return an Error.\nIn our shorten method, we switched from a get to a post request.\nThe advantage is, that we don't need to deal with URL encoding. We just create a struct Url and derive FromForm for it, which will handle the deserialization for us. Fancy!\nWe're done. Let's fire up the service again and try it out!\n\ncargo run\n\nIn a new window, we can now store our first URL:\n\ncurl --data "url=https://www.matthias-endler.de" http://localhost:8000/\n\nWe get some ID back that we can use to retrieve the URL again. In my case, this was gY.\nPoint your browser to http://localhost:8000/gY.\nYou should be redirected to my homepage.\nSummary\nRocket provides fantastic documentation and a great community.\nIt really feels like an idiomatic Rustlang web framework.\nI hope you had some fun while playing with Rocket.\nYou can find the full example code on Github.\n" + "body": "One common systems design task in interviews is to sketch the software architecture of a URL shortener (a bit.ly clone, if you may). Since I was playing around with Rocket – a web framework for Rust – why not give it a try? A rocket travelling through space Requirements A URL shortener has two main responsibilities: Create a short URL for a longer one (d’oh!). Redirect to the longer link when the short link is requested. Let’s call our service rust.ly (Hint, hint: the domain is still available at the time of writing…). First, let’s create a new Rust project: cargo new --bin rustly Next, we add Rocket to our Cargo.toml: [dependencies] rocket = &amp;quot;0.2.4&amp;quot; rocket_codegen = &amp;quot;0.2.4&amp;quot; Warning: Most likely you need to get the very newest Rocket version. Otherwise, you might get some… entertaining error messages. Find the newest version on crates.io. Since Rocket requires cutting-edge Rust features, we need to use a recent nightly build. Rustup provides a simple way to switch between stable and nightly. 🤔 Nightly Rust might no longer be required. Has anyone tried without and can report back? rustup update &amp;amp;&amp;amp; rustup override set nightly A first prototype Now we can start coding our little service. First, let’s write a simple “hello world” skeleton to get started. Put this into src&#x2F;main.rs: #![feature(plugin)] #![plugin(rocket_codegen)] extern crate rocket; #[get(&amp;quot;&#x2F;&amp;lt;id&amp;gt;&amp;quot;)] fn lookup(id: &amp;amp;str) -&amp;gt; String format!(&amp;quot;⏩ You requested . Wonderful!&amp;quot;, id) #[get(&amp;quot;&#x2F;&amp;lt;url&amp;gt;&amp;quot;)] fn shorten(url: &amp;amp;str) -&amp;gt; String format!(&amp;quot;💾 You shortened . Magnificent!&amp;quot;, url) fn main() rocket::ignite().mount(&amp;quot;&#x2F;&amp;quot;, routes![lookup]) .mount(&amp;quot;&#x2F;shorten&amp;quot;, routes![shorten]) .launch(); Under the hood, Rocket is doing some magic to enable this nice syntax. More specifically, we use the rocket_codegen crate for that. In order to bring the rocket library into scope, we write extern crate rocket;. We defined the two routes for our service. Both routes will respond to a GET request. This is done by adding an attribute named get to a function. The attribute can take additional arguments. In our case, we define an id variable for the lookup endpoint and a url variable for the shorten endpoint. Both variables are Unicode string slices. Since Rust has awesome Unicode support, we respond with a nice emoji just to show off. 🕶 Lastly, we need a main function, which launches Rocket and mounts our two routes. This way, they become publicly available. If you want to know even more about the in-depth details, I may refer you to the official Rocket documentation. Let’s check if we’re on the right track by running the application. cargo run After some compiling, you should get some lovely startup output from Rocket: 🔧 Configured for development. =&amp;gt; address: localhost =&amp;gt; port: 8000 =&amp;gt; log: normal =&amp;gt; workers: 8 🛰 Mounting &amp;#39;&#x2F;&amp;#39;: =&amp;gt; GET &#x2F;&amp;lt;hash&amp;gt; 🛰 Mounting &amp;#39;&#x2F;shorten&amp;#39;: =&amp;gt; GET &#x2F;shorten&#x2F;&amp;lt;url&amp;gt; 🚀 Rocket has launched from https:&#x2F;&#x2F;localhost:8000... Sweet! Let’s call our service. &amp;gt; curl localhost:8000&#x2F;shorten&#x2F;www.endler.dev 💾 You shortened www.endler.dev. Magnificent! &amp;gt; curl localhost:8000&#x2F;www.endler.dev ⏩ You requested www.endler.dev. Wonderful! So far so good. Data storage and lookup We need to keep the shortened URLs over many requests… but how? In a production scenario, we could use some NoSQL data store like Redis for that. Since the goal is to play with Rocket and learn some Rust, we will simply use an in-memory store. Rocket has a that feature called managed state. In our case, we want to manage a repository of URLs. First, let’s create a file named src&#x2F;repository.rs: use std::collections::HashMap; use shortener::Shortener; pub struct Repository urls: HashMap&amp;lt;String, String&amp;gt;, shortener: Shortener, impl Repository pub fn new() -&amp;gt; Repository Repository urls: HashMap::new(), shortener: Shortener::new(), pub fn store(&amp;amp;mut self, url: &amp;amp;str) -&amp;gt; String let id = self.shortener.next_id(); self.urls.insert(id.to_string(), url.to_string()); id pub fn lookup(&amp;amp;self, id: &amp;amp;str) -&amp;gt; Option&amp;lt;&amp;amp;String&amp;gt; self.urls.get(id) Within this module we first import the HashMap implementation from the standard library. We also include shortener::Shortener;, which helps us shorten the URLs in the next step. Don’t worry too much about that for now. By convention, we implement a new() method to create a Repository struct with an empty HashMap and a new Shortener. Additionally, we have two methods, store and lookup. store takes a URL and writes it to our in-memory HashMap storage. It uses our yet-to-be-defined shortener to create a unique id. It returns the shortened ID for the entry. lookup gets a given ID from the storage, and returns it as an Option. If the ID is found, the return value will be Some(url); if there is no match it will return None. Note that we convert the string slices (&amp;amp;str) to String using the to_string() method. This way we don’t need to deal with lifetimes. As a beginner, don’t think too hard about them. Additional remarks (can safely be skipped) A seasoned (Rust) developer™ might do a few things differently here. Did you notice the tight coupling between the repository and the shortener? In a production system, Repository and Shortener might simply be concrete implementations of traits (which are a bit like interfaces in other languages, but more powerful). For example, Repository could implement a Cache trait: trait Cache &#x2F;&#x2F; Store an entry and return an ID fn store(&amp;amp;mut self, data: &amp;amp;str) -&amp;gt; String; &#x2F;&#x2F; Look up a previously stored entry fn lookup(&amp;amp;self, id: &amp;amp;str) -&amp;gt; Option&amp;lt;&amp;amp;String&amp;gt;; This way we get clear sepration of concerns, and we can easily switch to a different implementation (e.g. a RedisCache). Also, we could have a MockRepository to simplify testing. Same for Shortener. On top of that, you might want to use the Into trait to support both, &amp;amp;str and String as parameters of store: pub fn store&amp;lt;T: Into&amp;lt;String&amp;gt;&amp;gt;(&amp;amp;mut self, url: T) -&amp;gt; String let id = self.shortener.shorten(url); self.urls.insert(id.to_owned(), url.into()); id If you’re curious about this, read this article from Herman J. Radtke III. For now, let’s keep it simple. Actually shortening URLs Let’s implement the URL shortener itself. You might be surprised how much was written about URL shortening all over the web. One common way is to create short URLs using base 62 conversion. After looking around some more, I found this sweet little crate called harsh, which perfectly fits the bill. It creates a hash id from an input string. To use harsh, we add it to the dependency section of our Cargo.toml: harsh = &amp;quot;0.1.2&amp;quot; Next, we add the crate to the top of to our main.rs: extern crate harsh; Let’s create a new file named src&#x2F;shortener.rs and write the following: use harsh:: Harsh, HarshBuilder ; pub struct Shortener id: u64, generator: Harsh, impl Shortener pub fn new() -&amp;gt; Shortener let harsh = HarshBuilder::new().init().unwrap(); Shortener id: 0, generator: harsh, pub fn next_id(&amp;amp;mut self) -&amp;gt; String let hashed = self.generator.encode(&amp;amp;[self.id]).unwrap(); self.id += 1; hashed With use harsh:: Harsh, HarshBuilder ; we bring the required structs into scope. Then we define our own Shortener struct, which wraps Harsh. It has two fields: id stores the next id for shortening. (Since there won’t be any negative ids, we use an unsigned integer for that.) The other field is the generator itself, for which we use Harsh. Using the HarshBuilder you can do a lot of fancy stuff, like setting a custom alphabet for the ids. We’re good for now, but for more info, check out the official docs. With next_id we retrieve a new String id for our URLs. As you can see, we don’t pass the URL to next_id. That means we actually don’t shorten anything. We merely create a short, unique ID. That’s because most hashing algorithms produce fairly long URLs and having short URLs is kind of the whole idea. Wiring it up So we are done with our shortener and the repository. We need to adjust our src&#x2F;main.rs again to make use of the two. This is the point where it gets a little hairy. I have to admit that I struggled a bit here. Mainly because I was not used to multi-threaded request handling. In Python or PHP you don’t need to think about shared-mutable access. Initially I had the following code in my main.rs: #[get(&amp;quot;&#x2F;&amp;lt;url&amp;gt;&amp;quot;)] fn store(repo: State&amp;lt;Repository&amp;gt;, url: &amp;amp;str) repo.store(url); fn main() rocket::ignite().manage(Repository::new()) .mount(&amp;quot;&#x2F;store&amp;quot;, routes![store]) .launch(); State is the built-in way to save data across requests in Rocket. Just tell it what belongs to your application state with manage() and Rocket will automatically inject it into the routes. But the compiler said no: error: cannot borrow immutable borrowed content as mutable --&amp;gt; src&#x2F;main.rs | | repo.store(url); | ^^^^ cannot borrow as mutable In hindsight it all makes sense: What would happen if two requests wanted to modify our repository at the same time? Rust prevented a race condition here! Yikes. Admittedly, the error message could have been a bit more user-friendly, though. Fortunately, Sergio Benitez (the creator of Rocket) helped me out on the Rocket IRC channel (thanks again!). The solution was to put the repository behind a Mutex. Here is our src&#x2F;main.rs in its full glory: #![feature(plugin, custom_derive)] #![plugin(rocket_codegen)] extern crate rocket; extern crate harsh; use std::sync::RwLock; use rocket::State; use rocket::request::Form; use rocket::response::Redirect; mod repository; mod shortener; use repository::Repository; #[derive(FromForm)] struct Url url: String, #[get(&amp;quot;&#x2F;&amp;lt;id&amp;gt;&amp;quot;)] fn lookup(repo: State&amp;lt;RwLock&amp;lt;Repository&amp;gt;&amp;gt;, id: &amp;amp;str) -&amp;gt; Result&amp;lt;Redirect, &amp;amp;&amp;#39;static str&amp;gt; match repo.read().unwrap().lookup(id) Some(url) =&amp;gt; Ok(Redirect::permanent(url)), _ =&amp;gt; Err(&amp;quot;Requested ID was not found.&amp;quot;) #[post(&amp;quot;&#x2F;&amp;quot;, data = &amp;quot;&amp;lt;url_form&amp;gt;&amp;quot;)] fn shorten(repo: State&amp;lt;RwLock&amp;lt;Repository&amp;gt;&amp;gt;, url_form: Form&amp;lt;Url&amp;gt;) -&amp;gt; Result&amp;lt;String, String&amp;gt; let ref url = url_form.get().url; let mut repo = repo.write().unwrap(); let id = repo.store(&amp;amp;url); Ok(id.to_string()) fn main() rocket::ignite().manage(RwLock::new(Repository::new())) .mount(&amp;quot;&#x2F;&amp;quot;, routes![lookup, shorten]) .launch(); As you can see we’re using a std::sync::RwLock here, to protect our repository from shared mutable access. This type of lock allows any number of readers or at most one writer at the same time. It makes our code a bit harder to read because whenever we want to access our repository, we need to call the read and write methods first. In our lookup method, you can see that we are returning a Result type now. It has two cases: if we find an id in our repository, we return Ok(Redirect::permanent(url)), which will take care of the redirect. If we can’t find the id, we return an Error. In our shorten method, we switched from a get to a post request. The advantage is, that we don’t need to deal with URL encoding. We just create a struct Url and derive FromForm for it, which will handle the deserialization for us. Fancy! We’re done. Let’s fire up the service again and try it out! cargo run In a new window, we can now store our first URL: curl --data &amp;quot;url=https:&#x2F;&#x2F;www.endler.dev&amp;quot; https:&#x2F;&#x2F;localhost:8000&#x2F; We get some ID back that we can use to retrieve the URL again. In my case, this was gY. Point your browser to https:&#x2F;&#x2F;localhost:8000&#x2F;gY and you should be redirected to my homepage. Summary Rocket provides fantastic documentation and a great community. It really feels like an idiomatic Rustlang web framework. I hope you had some fun while playing with Rocket. You can find the full example code on Github. " }, { "title": "The Essence of Information", "url": "https://endler.dev/2017/the-essence-of-information/", - "body": "People look confused when I tell them about my passion for algorithms and data-structures.\nMost of them understand what a Programmer is doing, but not what Computer Science is good for.\nAnd even if they do, they think it has no practical relevance.\nLet me show you with a simple example, that applied Computer Science can be found everywhere.\nImagine a pile of socks that need to get sorted.\nNot exactly the most exciting pastime.\nYou've put off this task for so long, that it will inevitably take an hour to be done.\n\n \n \n Yes, there is a game about sorting socks.\n \n It's called Sort the Socks and you can get it for free on the App Store.\n \n\nConsidering your options, you decide to get some help.\nTogether with a friend you get to work. You finish in roughly half the time.\nA Computer Scientist might call this pile of socks a resource.\nYou and your friend get bluntly degraded to workers.\nBoth of you can work on the problem at the same time — or in parallel.\nThis is the gist of Parallel Computing.\nNow, some properties make sock-sorting a good fit for doing in parallel.\n\nThe work can be nicely split up. It takes about the same time for every worker to find a pair of socks.\nFinding a different pair is a completely separate task that can happen at the same time.\n\nThe more workers you assign to this task, the faster you're done.\n\n1 worker takes 60 minutes.\n2 workers take 30 minutes.\n\nHow long will 3 workers take? Right! Around 20 minutes. We could write down\na simple formula for this relationship:\n \nWell, that is not quite correct. We forgot to consider the overhead: When Mary\ntries to pick up a sock, Stephen might reach for the same.\nThey both smile and one of them picks another sock.\nIn computing, a worker might do the same. Well, not smiling but picking another\ntask. When lots of workers share resources, these situations occur quite\nfrequently. And resolving the situation always takes a little extra time. So we are a\nbit away from our optimal sorting speed because of that.\nBut it gets worse! Let's say you have 100 workers for 100 socks.\nIn the beginning, every worker might take one sock and try to find a match for\nit. Here's the problem: As soon as they pick up one sock each, there are no\nsocks left. All workers are in a waiting state. The sorting takes forever.\nThat's a deadlock, and it's one of the most frightening scenarios of parallel computing.\nIn this case, a simple solution is to put down the sock again and wait for some time until trying to get a new sock.\nAnother way out of the dilemma would be, to enforce some kind of "protocol" for sorting. \nThink of a protocol as a silent agreement between the workers on how to achieve a common goal.\nSo, in our case, each worker might only be responsible for one color of socks.\nWorker one takes the green socks, worker two the gray ones and so on.\nWith this simple trick, we can avoid a deadlock, because we work on completely\nseparate tasks.\nBut there's still a catch. What if there are only four green socks and 4000 gray socks?\nWorker one would get bored fairly quickly. He would sort the two pairs of socks in\nno time and then watch worker two sort the rest.\nThat's not really team spirit, is it?\nSplitting up the work like this makes most sense, if we can assume that we\nhave around the same number of socks for every color.\nThis way we achieve roughly the same workload for\neveryone.\nThe following histogram gives you an idea of what I mean:\n\nIn this case, we have about equally sized piles for each color. Looks\nlike a fair workload for every worker to me.\n\nIn the second case, we don't have an equal distribution. I don't want to sort the\ngray socks in this example. We need to think a little harder here.\nWhat can we do?\nMost of the time it helps to think of other ways to split up work.\nFor example, we could have two workers sort the big gray pile together. One\nsorts the large socks; the other one sorts the small ones. We run into another problem, though: Who decides what "large" and "small" means in this case?\nSo, instead of thinking too hard about a smarter approach, we decide to be\npragmatic here. Everyone just grabs an equally sized pile of socks — no\nmatter the color or the size — and gets\nto work.\nMost likely, there will be some remaining socks in each pile, which have no match.\nThat's fine. We just throw them all together, mix the socks, create new piles from\nthat, and sort them again. We do so until we're done.\nWe call that a task queue. It has two advantages: First, you don't need any additional agreements between the workers and second, it scales reasonably\nwell with the number of workers without thinking too hard about the problem\ndomain.\nThe tricky part about distributed systems is, that seemingly straightforward solutions can fail\nmiserably in practice.\nWhat if our small piles look like this?\n \nThe number of pairs in each pile is... sobering.\nWhat we could do is run a very quick presorting step to increase the number of matches. Or maybe you come up with an even better idea?\nThe cool thing is, once you have found a faster approach, it works for similar tasks, too.\nProblems like this have their roots in Computer Science, and they can be found everywhere.\nPersonally, I don't like the term Computer Science too much. I prefer\nthe German term "Informatik", which I would roughly translate as "Information Science".\nBecause the real essence of what we're doing here is to find a general way to solve a\nwhole class of problems. We think of the nature of objects and their properties.\nWe don't sort socks; we try to answer the fundamental questions of information. Maybe now you can understand why I'm so passionate about this subject.\nOh, and here's a related post about why I love programming.\n" + "body": "People look confused when I tell them about my passion for algorithms and data-structures. Most of them understand what a Programmer is doing, but not what Computer Science is good for. And even if they do, they think it has no practical relevance. Let me show you with a simple example, that applied Computer Science can be found everywhere. Imagine a pile of socks that need to get sorted. Not exactly the most exciting pastime. You’ve put off this task for so long, that it will inevitably take an hour to be done. Yes, there is a game about sorting socks. Source: It’s called Sort the Socks and you can get it for free on the App Store. Considering your options, you decide to get some help. Together with a friend you get to work. You finish in roughly half the time. A Computer Scientist might call this pile of socks a resource. You and your friend get bluntly degraded to workers. Both of you can work on the problem at the same time — or in parallel. This is the gist of Parallel Computing. Now, some properties make sock-sorting a good fit for doing in parallel. The work can be nicely split up. It takes about the same time for every worker to find a pair of socks. Finding a different pair is a completely separate task that can happen at the same time. The more workers you assign to this task, the faster you’re done. 1 worker takes 60 minutes. 2 workers take 30 minutes. How long will 3 workers take? Right! Around 20 minutes. We could write down a simple formula for this relationship: The formula for the sorting time. Well, that is not quite correct. We forgot to consider the overhead: When Mary tries to pick up a sock, Stephen might reach for the same. They both smile and one of them picks another sock. In computing, a worker might do the same. Well, not smiling but picking another task. When lots of workers share resources, these situations occur quite frequently. And resolving the situation always takes a little extra time. So we are a bit away from our optimal sorting speed because of that. But it gets worse! Let’s say you have 100 workers for 100 socks. In the beginning, every worker might take one sock and try to find a match for it. Here’s the problem: As soon as they pick up one sock each, there are no socks left. All workers are in a waiting state. The sorting takes forever. That’s a deadlock, and it’s one of the most frightening scenarios of parallel computing. In this case, a simple solution is to put down the sock again and wait for some time until trying to get a new sock. Another way out of the dilemma would be, to enforce some kind of “protocol” for sorting. Think of a protocol as a silent agreement between the workers on how to achieve a common goal. So, in our case, each worker might only be responsible for one color of socks. Worker one takes the green socks, worker two the gray ones and so on. With this simple trick, we can avoid a deadlock, because we work on completely separate tasks. But there’s still a catch. What if there are only four green socks and 4000 gray socks? Worker one would get bored fairly quickly. He would sort the two pairs of socks in no time and then watch worker two sort the rest. That’s not really team spirit, is it? Splitting up the work like this makes most sense, if we can assume that we have around the same number of socks for every color. This way we achieve roughly the same workload for everyone. The following histogram gives you an idea of what I mean: Even piles of socks. In this case, we have about equally sized piles for each color. Looks like a fair workload for every worker to me. Uneven piles of socks. In the second case, we don’t have an equal distribution. I don’t want to sort the gray socks in this example. We need to think a little harder here. What can we do? Most of the time it helps to think of other ways to split up work. For example, we could have two workers sort the big gray pile together. One sorts the large socks; the other one sorts the small ones. We run into another problem, though: Who decides what “large” and “small” means in this case? So, instead of thinking too hard about a smarter approach, we decide to be pragmatic here. Everyone just grabs an equally sized pile of socks — no matter the color or the size — and gets to work. Most likely, there will be some remaining socks in each pile, which have no match. That’s fine. We just throw them all together, mix the socks, create new piles from that, and sort them again. We do so until we’re done. We call that a task queue. It has two advantages: First, you don’t need any additional agreements between the workers and second, it scales reasonably well with the number of workers without thinking too hard about the problem domain. The tricky part about distributed systems is, that seemingly straightforward solutions can fail miserably in practice. What if our small piles look like this? A random pile of socks. The number of pairs in each pile is… sobering. What we could do is run a very quick presorting step to increase the number of matches. Or maybe you come up with an even better idea? The cool thing is, once you have found a faster approach, it works for similar tasks, too. Problems like this have their roots in Computer Science, and they can be found everywhere. Personally, I don’t like the term Computer Science too much. I prefer the German term “Informatik”, which I would roughly translate as “Information Science”. Because the real essence of what we’re doing here is to find a general way to solve a whole class of problems. We think of the nature of objects and their properties. We don’t sort socks; we try to answer the fundamental questions of information. Maybe now you can understand why I’m so passionate about this subject. Oh, and here’s a related post about why I love programming. " }, { "title": "Why I Love Programming", "url": "https://endler.dev/2017/why-i-love-programming/", - "body": "Programming has many faces. It is the science of structured thinking.\nIt is the art of eloquent expression.\nIt teaches you to be humble when you look at other peoples' fascinating work. \nMost of all, it teaches you a lot about yourself.\nWhile the syntax may change, the concepts will not.\n\nThis post is split into two parts.\nIn the first part, I will talk about the joy of programming.\nThe second part will deal with the notion of being a professional programmer.\nIf you're not sure yet whether you want to learn how to program, this article is for you.\nAutomating stuff gives you superhero strengths\nBeing able to program is infinitely rewarding. You can help your sister sort a\nthousand pictures in a few seconds. You write a little backup\nscript for your grandma. The possibilities are endless.\nCoding is fun!\nCoding something is more fun than using it. It's even better than playing games.\nWhy? Learn how to program a computer and get the best games for free — your own.\nYou're in total control. It's your idea, your logic, even your laws of physics.\nIt's like building a house but without paying anything for the\nbuilding materials. You can build a mansion for free.\nSharing is fun, too!\nTo get new inspiration for your next project, read the programs of others.\nThis will give you an idea of how they think and how they solve problems.\nMany great programmers share their best code with you.\nYou can do the same and share your project - or just the prettiest parts of it - with other programmers.\nWatching somebody else use your work is one\nof the most satisfying things you will ever experience.\nIt's very fulfilling to see your tool serve a purpose it wasn't built for.\nElegant, creative solutions\nIt's very appealing to work so hard on your vision that everything unnecessary peels off.\nAll these little ideas and fundamental insights suddenly fall into place.\nWhat's remaining is the distilled truth, the result of an ambitious but rewarding thought process\nand when you write it down as a program you can see all the little pieces working together.\nThis makes it so gratifying to figure stuff out on your own.\nProgramming is about understanding a problem so thoroughly, that you can teach a\npiece of metal how to solve it.\nEven the way your program is structured can be a piece of art.\nIt can be concise, witty and fast all at the same time.\nTalk to a machine\nIt's fascinating that something is understood by machines and humans using the same language.\nI'm baffled when I realize that these circuits can actually "understand" and interpret words - in a way.\nStanding on the shoulders of giants\nTalking to other programmers and watching them work is a fascinating inspiration. \nThe very system you are using to read this text relies on their work.\nEven if you're far apart, you can study their work on Open Source projects online.\nBut if you get a chance, watch them giving talks at conferences and meet them at local user groups.\nBecoming part of a community is gratifying.\nTo exchange ideas and to collaborate on projects helps you push your boundaries and learn something new every day.\nHave fun, forget the rest\nThe machine is agnostic to your skin color. It doesn't matter if you're a twelve-year-old girl or a lecturer at University.\nIf you keep making the same mistake for ten hours straight, your computer won't scream at you. It won't punish you. It will happily await your commands. Also, the hurdles of entry are pretty low. An old computer is enough; even pen and paper and a book will suffice to work on cool programming ideas.\nGet started!\nYou choose your own projects; nobody else.\nDon't let anybody tell you that you're not smart enough for this stuff. Ever.\nEach program is a wonderful journey so join us and code the world around you.\n" + "body": "Programming has many faces. It is the science of structured thinking. It is the art of eloquent expression. It teaches you to be humble when you look at other peoples’ fascinating work. Most of all, it teaches you a lot about yourself. While the syntax may change, the concepts will not. This post is split into two parts. In the first part, I will talk about the joy of programming. The second part will deal with the notion of being a professional programmer. If you’re not sure yet whether you want to learn how to program, this article is for you. Automating stuff gives you superhero strengths Being able to program is infinitely rewarding. You can help your sister sort a thousand pictures in a few seconds. You write a little backup script for your grandma. The possibilities are endless. Coding is fun! Coding something is more fun than using it. It’s even better than playing games. Why? Learn how to program a computer and get the best games for free — your own. You’re in total control. It’s your idea, your logic, even your laws of physics. It’s like building a house but without paying anything for the building materials. You can build a mansion for free. Sharing is fun, too! To get new inspiration for your next project, read the programs of others. This will give you an idea of how they think and how they solve problems. Many great programmers share their best code with you. You can do the same and share your project - or just the prettiest parts of it - with other programmers. Watching somebody else use your work is one of the most satisfying things you will ever experience. It’s very fulfilling to see your tool serve a purpose it wasn’t built for. Elegant, creative solutions It’s very appealing to work so hard on your vision that everything unnecessary peels off. All these little ideas and fundamental insights suddenly fall into place. What’s remaining is the distilled truth, the result of an ambitious but rewarding thought process and when you write it down as a program you can see all the little pieces working together. This makes it so gratifying to figure stuff out on your own. Programming is about understanding a problem so thoroughly, that you can teach a piece of metal how to solve it. Even the way your program is structured can be a piece of art. It can be concise, witty and fast all at the same time. Talk to a machine It’s fascinating that something is understood by machines and humans using the same language. I’m baffled when I realize that these circuits can actually “understand” and interpret words - in a way. Standing on the shoulders of giants Talking to other programmers and watching them work is a fascinating inspiration. The very system you are using to read this text relies on their work. Even if you’re far apart, you can study their work on Open Source projects online. But if you get a chance, watch them giving talks at conferences and meet them at local user groups. Becoming part of a community is gratifying. To exchange ideas and to collaborate on projects helps you push your boundaries and learn something new every day. Have fun, forget the rest The machine is agnostic to your skin color. It doesn’t matter if you’re a twelve-year-old girl or a lecturer at University. If you keep making the same mistake for ten hours straight, your computer won’t scream at you. It won’t punish you. It will happily await your commands. Also, the hurdles of entry are pretty low. An old computer is enough; even pen and paper and a book will suffice to work on cool programming ideas. Get started! You choose your own projects; nobody else. Don’t let anybody tell you that you’re not smart enough for this stuff. Ever. Each program is a wonderful journey so join us and code the world around you. " }, { "title": "Tools", "url": "https://endler.dev/2011/tools/", - "body": "For as long as I can think, religious flamewars have infected computer science.\nHaving arguments about technical topics can be healthy, but flamewars are not. I'm sick of it.\nI'm fed up with people telling me that their work environment is oh-so better,\nfaster and so on. That's fine, but it doesn't matter. Your equipment only plays a supporting role. You don't even need\na computer to do programming. Donald Knuth wrote algorithms on a\nnotepad. Alan Turing wrote the first chess computer on a piece of\npaper. And it worked. Beat that!\nFor an average user, the next best system is probably good enough. Just a few bucks and you get an excellent piece of hardware which is completely sufficient to surf the web, chat, archive photos, write documents, listen to music and watch movies. You can do that with a Pentium IV, 256 MB RAM and any recent Operating System (you will likely get that one for free). Heck, you can use your old Commodore for most of that. Computers have been mature and reliable enough to do all that for ages. There's no need to upgrade your system for Farmville, just like there's no reason to buy a new car if the old one works perfectly fine. When it comes to software, many of us still use Office 2000 or Photoshop 8 or VisiCalc without feeling the urge to upgrade.\nProfessionals find themselves in a similar situation. Well, maybe we invest a bit more money, but still, our hardware is incredibly cheap compared to our salary (hopefully). Nothing is perfect, but most of the time it's good enough. That compiler you were using a decade ago? Still does the job. We are still using slightly modified descendants of programming languages from computing stone-age. Even if you're doing numerical computing for NASA, your primary work environment is a black box running a text editor or an IDE.\nI don't care what you are using to get things done. Find an environment that suits your needs and be happy with it. Maybe you use Emacs on a Lemote Yeelong netbook (hello Richard Stallman) or Vim on your workstation. It's the same thing: A text editor running on a piece of metal.\nYou're not a worse programmer for using Nano, ed or TextMate. Notepad works just fine, too. It loads files, saves files and lets you edit them in between. That's a hell lot more functionality than Bill Gates and Paul Allen had when they wrote a BASIC interpreter for the Altair. If you find something you're happy with, just stick with it but don't start arguing. It isn't worth your time.\nDon't feed the trolls. When it comes to software, don't fall into the old FreeBSD vs. Linux vs. Windows vs. mum cliche. Instead, talk about your code. Let's look at your problem-solving skills. Let's be pragmatic here.\n\nTalk is cheap. Show me the code. - Linus Torvalds\n\nI don't care which programming language you are using. Java? Fine. Visual Basic? Great! Scala, Cobol, PHP, C++? All fine. Write in Assembler or lolcode. Don't moan about the fact that language X is missing feature Y. Write a library or use something different. Stop saying JavaScript is a toy language. It just doesn't fit your needs. Instead, show me your Lisp adventure game. Write an interpreter for Brainfuck. Do something. Move things.\nConcerning PHP, nir wrote on Hackernews:\n\nAny idiot can write a snarky comment about PHP. Very few get to write code that has anywhere near the impact it had.\n\nWill you fall off your chair when I admit that I like the PHP syntax?\nOK, it has its rough edges (do we really need the $ sign?) but what's\nmore important is how much I can get done with it. PHP was my long time\ngo-to language for off the hook, one time scripts. It looks a bit ugly\nbut it runs on any server and comes with an enormous amount of built-in\nfunctionality. It's great for rapid prototyping and gluing things together.\nIn fact, when you write a piece of software, what you should strive for is to produce quite good software and what you really need to accomplish is good enough software to make your users happy.\nZed A. Shaw puts it quite nicely in the afterword to Learn Python the hard way\n\nI have been programming for a very long time. So long that it is incredibly boring to me. At the time that I wrote this book I knew about 20 programming languages and could learn new ones in about a day to a week depending on how weird they were. Eventually though this just became boring and couldn't hold my interest. What I discovered after this journey of learning was that the languages didn't matter, it was what you did with them. Actually, I always knew that, but I'd get distracted by the languages and forget it periodically. The programming language you learn and use does not matter. Do not get sucked into the religion surrounding programming languages as that will only blind you to their real purpose of being your tool for doing interesting things.\n\nDon't get emotional for any tool you use. An iPhone - I'm sorry to disappoint you - is just a phone. No magic. No "think different". "But it's evil!", the ether says, "it's not open source". Well, Android just exists because Google needed to rapidly develop a mobile platform. It's simply part of their business. There is no moral behind that. Google is a yet another company just like Microsoft or Apple.\nMy MacBook serves me as a solid tool, but if something "better" comes around, I will happily kick it out. I've ditched Firefox after five years just because Chrome is faster and I will get rid of Chrome when I find a worthy successor.\nVim is quite good in my opinion but if there's a faster way to do things I'm not afraid to dump it. Instead get your hands dirty and fix the problems or craft something new.\n" + "body": "For as long as I can think, religious flamewars have infected computer science. Having arguments about technical topics can be healthy, but flamewars are not. I’m sick of it. I’m fed up with people telling me that their work environment is oh-so better, faster and so on. That’s fine, but it doesn’t matter. Your equipment only plays a supporting role. You don’t even need a computer to do programming. Donald Knuth wrote algorithms on a notepad. Alan Turing wrote the first chess computer on a piece of paper. And it worked. Beat that! For an average user, the next best system is probably good enough. Just a few bucks and you get an excellent piece of hardware which is completely sufficient to surf the web, chat, archive photos, write documents, listen to music and watch movies. You can do that with a Pentium IV, 256 MB RAM and any recent Operating System (you will likely get that one for free). Heck, you can use your old Commodore for most of that. Computers have been mature and reliable enough to do all that for ages. There’s no need to upgrade your system for Farmville, just like there’s no reason to buy a new car if the old one works perfectly fine. When it comes to software, many of us still use Office 2000 or Photoshop 8 or VisiCalc without feeling the urge to upgrade. Professionals find themselves in a similar situation. Well, maybe we invest a bit more money, but still, our hardware is incredibly cheap compared to our salary (hopefully). Nothing is perfect, but most of the time it’s good enough. That compiler you were using a decade ago? Still does the job. We are still using slightly modified descendants of programming languages from computing stone-age. Even if you’re doing numerical computing for NASA, your primary work environment is a black box running a text editor or an IDE. I don’t care what you are using to get things done. Find an environment that suits your needs and be happy with it. Maybe you use Emacs on a Lemote Yeelong netbook (hello Richard Stallman) or Vim on your workstation. It’s the same thing: A text editor running on a piece of metal. You’re not a worse programmer for using Nano, ed or TextMate. Notepad works just fine, too. It loads files, saves files and lets you edit them in between. That’s a hell lot more functionality than Bill Gates and Paul Allen had when they wrote a BASIC interpreter for the Altair. If you find something you’re happy with, just stick with it but don’t start arguing. It isn’t worth your time. Don’t feed the trolls. When it comes to software, don’t fall into the old FreeBSD vs. Linux vs. Windows vs. mum cliche. Instead, talk about your code. Let’s look at your problem-solving skills. Let’s be pragmatic here. Talk is cheap. Show me the code. - Linus Torvalds I don’t care which programming language you are using. Java? Fine. Visual Basic? Great! Scala, Cobol, PHP, C++? All fine. Write in Assembler or lolcode. Don’t moan about the fact that language X is missing feature Y. Write a library or use something different. Stop saying JavaScript is a toy language. It just doesn’t fit your needs. Instead, show me your Lisp adventure game. Write an interpreter for Brainfuck. Do something. Move things. Concerning PHP, nir wrote on Hacker News: Any idiot can write a snarky comment about PHP. Very few get to write code that has anywhere near the impact it had. Will you fall off your chair when I admit that I like the PHP syntax? OK, it has its rough edges (do we really need the $ sign?) but what’s more important is how much I can get done with it. PHP was my long time go-to language for off the hook, one time scripts. It looks a bit ugly but it runs on any server and comes with an enormous amount of built-in functionality. It’s great for rapid prototyping and gluing things together. In fact, when you write a piece of software, what you should strive for is to produce quite good software and what you really need to accomplish is good enough software to make your users happy. Zed A. Shaw puts it quite nicely in the afterword to Learn Python the hard way I have been programming for a very long time. So long that it is incredibly boring to me. At the time that I wrote this book I knew about 20 programming languages and could learn new ones in about a day to a week depending on how weird they were. Eventually though this just became boring and couldn’t hold my interest. What I discovered after this journey of learning was that the languages didn’t matter, it was what you did with them. Actually, I always knew that, but I’d get distracted by the languages and forget it periodically. The programming language you learn and use does not matter. Do not get sucked into the religion surrounding programming languages as that will only blind you to their real purpose of being your tool for doing interesting things. Don’t get emotional for any tool you use. An iPhone - I’m sorry to disappoint you - is just a phone. No magic. No “think different”. “But it’s evil!”, the ether says, “it’s not open source”. Well, Android just exists because Google needed to rapidly develop a mobile platform. It’s simply part of their business. There is no moral behind that. Google is a yet another company just like Microsoft or Apple. My MacBook serves me as a solid tool, but if something “better” comes around, I will happily kick it out. I’ve ditched Firefox after five years just because Chrome is faster and I will get rid of Chrome when I find a worthy successor. Vim is quite good in my opinion but if there’s a faster way to do things I’m not afraid to dump it. Instead get your hands dirty and fix the problems or craft something new. " }, { "title": "Are you a Programmer?", "url": "https://endler.dev/2011/are-you-a-programmer/", - "body": "My geography teacher once told the story of her first lecture at University.\nAs an introduction, her professor asked the class to draw\na map of Germany without any help and as accurate as possible. To her surprise, she was not\nable to fill the map with much detail. Even the shape of the country was a bit vague.\nShe had seen thousands of images of Germany (her mother country) but\nwasn't able to reproduce it from her blurry memory. She would have to look it up.\nDoesn't this sound familiar? We rely on machines to manage large portions\nof our knowledge. There's hard work involved to learn something by heart.\nHere is a similar test for programmers:\n\nUsing a programming language of your choice, write a correct sorting\nalgorithm with an average runtime complexity of O(n*log n) (Heapsort,\nQuicksort, Bucketsort, you name it) on a piece of paper without the help of any\nexternal tools.\n\nAnd by correct I mean it must be free of bugs without any modifications when you type it in.\nYou would be surprised by the large percentage of professional software\nengineers who can't pull this off.\nSome might argue that knowledge about details of programming language\nsyntax is unimportant: "Why learn all the little nitpicks when you know\nhow to use a search engine? Why start with a clean slate when you can easily\ncopy, paste and modify an example from a tutorial?\nEvery few years/months I have to completely relearn the syntax for a different language anyway."\nBut that is a myth. If you know only\none programming language really well - even if it is something\noutdated like Fortran or COBOL - you could easily earn a fortune with\nthat knowledge. Suppose you started with C in 1975. You could still\nuse the same syntax today - almost four decades later.\nSame for text editors. Emacs and Vim are both decades\nold. They are battle-hardened. I don't care which one you prefer, but you\nwill spend a large part of your life with your tools so invest the time to master them.\nAs a side note, it appears that very few people strive for perfection in anything they do.\nThey happily settle for "good enough". This can have many different reasons, and I'm not\nblaming anybody for not doing his homework but maybe I'm not alone with\nthat observation.\nIf you don't know how to use your tools without a manual, you are a lousy craftsman.\nIf you need a dictionary to write a simple letter, you will have a hard\ntime becoming a writer because it would already be challenging for you to form elegant, fluent\nsentences -- let alone engaging and original stories.\nI don't want to read these books.\nWhat makes a programmer?\n\nShe has at least one programming language she knows inside out.\nShe can implement standard algorithms (i.e. for sorting, searching)\nand data-structures (i.e. trees, linked lists) which are robust and\nreasonably fast on the fly.\nShe has at least a basic understanding of complexity theory and\nprogramming concepts like recursion and pointers.\n\nBut, to be a good programmer, you should\n\nBe able to code in at least two fundamentally different programming\nparadigms (i.e. declarative, functional).\nHave experience with big software architectures.\nBe familiar with your programming environment like the operating system and a sophisticated text editor of your choice. Preferably one, that is\neasily extendable.\n\nAnd that is just the tip of the iceberg.\n"There's too much to learn!", I hear some of you say.\nStart slowly.\nYou need only three commands to start with Vim: i, ESC, :wq.\nThat's enough for day one.\nI realize that most of these essentials won't be taught during lectures.\nYou have to learn a vast portion on your own.\nBut let's face it: If you don't know this stuff, you are not a programmer, you're a freshman.\n" + "body": "My geography teacher once told the story of her first lecture at University. As an introduction, her professor asked the class to draw a map of Germany without any help and as accurate as possible. To her surprise, she was not able to fill the map with much detail. Even the shape of the country was a bit vague. She had seen thousands of images of Germany (her mother country) but wasn’t able to reproduce it from her blurry memory. She would have to look it up. Doesn’t this sound familiar? We rely on machines to manage large portions of our knowledge. There’s hard work involved to learn something by heart. Here is a similar test for programmers: Using a programming language of your choice, write a correct sorting algorithm with an average runtime complexity of O(n*log n) (Heapsort, Quicksort, Bucketsort, you name it) on a piece of paper without the help of any external tools. And by correct I mean it must be free of bugs without any modifications when you type it in. You would be surprised by the large percentage of professional software engineers who can’t pull this off. Some might argue that knowledge about details of programming language syntax is unimportant: “Why learn all the little nitpicks when you know how to use a search engine? Why start with a clean slate when you can easily copy, paste and modify an example from a tutorial? Every few years&#x2F;months I have to completely relearn the syntax for a different language anyway.” But that is a myth. If you know only one programming language really well - even if it is something outdated like Fortran or COBOL - you could easily earn a fortune with that knowledge. Suppose you started with C in 1975. You could still use the same syntax today - almost four decades later. Same for text editors. Emacs and Vim are both decades old. They are battle-hardened. I don’t care which one you prefer, but you will spend a large part of your life with your tools so invest the time to master them. As a side note, it appears that very few people strive for perfection in anything they do. They happily settle for “good enough”. This can have many different reasons, and I’m not blaming anybody for not doing his homework but maybe I’m not alone with that observation. If you don’t know how to use your tools without a manual, you are a lousy craftsman. If you need a dictionary to write a simple letter, you will have a hard time becoming a writer because it would already be challenging for you to form elegant, fluent sentences — let alone engaging and original stories. I don’t want to read these books. What makes a programmer? She has at least one programming language she knows inside out. She can implement standard algorithms (i.e. for sorting, searching) and data-structures (i.e. trees, linked lists) which are robust and reasonably fast on the fly. She has at least a basic understanding of complexity theory and programming concepts like recursion and pointers. But, to be a good programmer, you should Be able to code in at least two fundamentally different programming paradigms (i.e. declarative, functional). Have experience with big software architectures. Be familiar with your programming environment like the operating system and a sophisticated text editor of your choice. Preferably one, that is easily extendable. And that is just the tip of the iceberg. “There’s too much to learn!”, I hear some of you say. Start slowly. You need only three commands to start with Vim: i, ESC, :wq. That’s enough for day one. I realize that most of these essentials won’t be taught during lectures. You have to learn a vast portion on your own. But let’s face it: If you don’t know this stuff, you are not a programmer, you’re a freshman. " }, { "title": "On Hard Work", "url": "https://endler.dev/2011/on-hard-work/", - "body": "Great people get shaped by their achievements\n\nThere's Thomas Edison who developed countless prototypes before selling a single light bulb.\nThe unemployed Joanne K. Rowling writing Harry Potter in a Cafe while caring for her child.\nSteve Wozniak creating the first personal computer in his spare time while working at HP.\n\nWhat do they have in common?\nThey all lived through frustration and contempt but still reached their goals, even though the chances for success were\nlow. These people are stemming their strong will from an intrinsic curiosity.\nDedication\nSure, I love what I do. I want to be a programmer for the rest of my life, but sometimes it seems simply too hard to finish a project.\nI get scared by the big picture and fear that I won't finish on time. What I need is a different mindset.\nDhanji R. Prasanna, a former Google Wave team member made this observation\n\nAnd this is the essential broader point--as a programmer you must have a series of wins, every single day. It is the Deus Ex Machina of hacker success. It is what makes you eager for the next feature, and the next after that.\n\nWhile Google Wave has not been commercially successful, it sure was a\ntechnical breakthrough -- and it was a drag to push it out into public.\nWe always have to see our goal right in front of us, as we take a billion baby steps to reach it.\nThis is true for any profession. Winners never give up.\nDirection\nToday it is easier to accomplish something meaningful than ever before.\nIf you are reading this, you have access to a powerful instrument -- a\ncomputer with an Internet connection. We live in a time where a single\nperson can accomplish miracles without hard physical labor.\nA time where billions of people can grow a business from their desk, get famous in minutes,\npublish books in seconds and have instant access to large amounts of\ndata. The most potent development over the last 100\nyears has been the reduction of communication costs. Transferring a bit of\ninformation to the other end of the world is virtually free and takes\nfractions of a second. While proper education was a privilege of a lucky few\nwell into the 20th century, learning new things is now mostly a question of\nwill.\nNevertheless, learning is still a tedious task,\nrequiring patience and determination.\nAs the amount of information has increased, so have the ways of distraction.\nLosing focus is just a click away.\nDevotion\nEverybody can start something. Few will finish anything.\nThat's because getting things done is hard, even if you love what\nyou're doing. (Watch the beginnings of There Will Be Blood and Primer for a\ndefinition of hard work.)\nNo matter what they tell you, achieving anything sustainable means hustling. It means making\nsacrifices. It means pushing through.\nIt means selling something even though it isn't perfect. Your beautiful project might turn into an ugly groundhog in\nthe end. Put makeup on it and get it out the door.\nOn a report about Quake's 3D-Engine, developer Michael Abrash says:\n\nBy the end of a project, the design is carved in stone, and most of the work involves fixing bugs, or trying to figure out how to shoehorn in yet another feature that was never planned for in the original design. All that is a lot less fun than starting a project, and often very hard work--but it has to be done before the project can ship. As a former manager of mine liked to say, "After you finish the first 90% of a project, you have to finish the other 90%." It's that second 90% that's the key to success.\n\n\nA lot of programmers get to that second 90%, get tired and bored and frustrated, and change jobs, or lose focus, or find excuses to procrastinate. There are a million ways not to finish a project, but there's only one way to finish: Put your head down and grind it out until it's done. Do that, and I promise you the programming world will be yours.\n\nThat last part has influenced me a lot.\nThe dedication, the urgency to reach your aims must come from within you.\nIt's your raw inner voice speaking -- don't let it fade away.\nAnd when you are close to giving up, stop thinking so hard. Just try to\npush forward and make a tiny step in the right direction.\nYou can't lose.\n" + "body": "Great people get shaped by their achievements There’s Thomas Edison who developed countless prototypes before selling a single light bulb. The unemployed Joanne K. Rowling writing Harry Potter in a Cafe while caring for her child. Steve Wozniak creating the first personal computer in his spare time while working at HP. What do they have in common? They all lived through frustration and contempt but still reached their goals, even though the chances for success were low. These people are stemming their strong will from an intrinsic curiosity. Dedication Sure, I love what I do. I want to be a programmer for the rest of my life, but sometimes it seems simply too hard to finish a project. I get scared by the big picture and fear that I won’t finish on time. What I need is a different mindset. Dhanji R. Prasanna, a former Google Wave team member made this observation And this is the essential broader point–as a programmer you must have a series of wins, every single day. It is the Deus Ex Machina of hacker success. It is what makes you eager for the next feature, and the next after that. While Google Wave has not been commercially successful, it sure was a technical breakthrough — and it was a drag to push it out into public. We always have to see our goal right in front of us, as we take a billion baby steps to reach it. This is true for any profession. Winners never give up. Direction Today it is easier to accomplish something meaningful than ever before. If you are reading this, you have access to a powerful instrument — a computer with an Internet connection. We live in a time where a single person can accomplish miracles without hard physical labor. A time where billions of people can grow a business from their desk, get famous in minutes, publish books in seconds and have instant access to large amounts of data. The most potent development over the last 100 years has been the reduction of communication costs. Transferring a bit of information to the other end of the world is virtually free and takes fractions of a second. While proper education was a privilege of a lucky few well into the 20th century, learning new things is now mostly a question of will. Nevertheless, learning is still a tedious task, requiring patience and determination. As the amount of information has increased, so have the ways of distraction. Losing focus is just a click away. Devotion Everybody can start something. Few will finish anything. That’s because getting things done is hard, even if you love what you’re doing. (Watch the beginnings of There Will Be Blood and Primer for a definition of hard work.) No matter what they tell you, achieving anything sustainable means hustling. It means making sacrifices. It means pushing through. It means selling something even though it isn’t perfect. Your beautiful project might turn into an ugly groundhog in the end. Put makeup on it and get it out the door. On a report about Quake’s 3D-Engine, developer Michael Abrash says: By the end of a project, the design is carved in stone, and most of the work involves fixing bugs, or trying to figure out how to shoehorn in yet another feature that was never planned for in the original design. All that is a lot less fun than starting a project, and often very hard work–but it has to be done before the project can ship. As a former manager of mine liked to say, “After you finish the first 90% of a project, you have to finish the other 90%.” It’s that second 90% that’s the key to success. A lot of programmers get to that second 90%, get tired and bored and frustrated, and change jobs, or lose focus, or find excuses to procrastinate. There are a million ways not to finish a project, but there’s only one way to finish: Put your head down and grind it out until it’s done. Do that, and I promise you the programming world will be yours. That last part has influenced me a lot. The dedication, the urgency to reach your aims must come from within you. It’s your raw inner voice speaking — don’t let it fade away. And when you are close to giving up, stop thinking so hard. Just try to push forward and make a tiny step in the right direction. Ship it! " }, { "title": "Overkill – Java as a First Programming Language", "url": "https://endler.dev/2010/overkill-java-as-a-first-programming-language/", - "body": "I recently talked to a student in my neighborhood about his first programming\nexperiences. They started learning Java at school, and it soon turned out to be\nhorrible.\nA lot of us learned to code in languages like BASIC or Pascal. There was no\nobject orientation, no sophisticated file I/O and almost no\nmodularization... and it was great. In BASIC you could just write\n\nPRINT "HELLO WORLD"\n\nand you were done. This was actually a running program solving a basic and\nreoccurring problem: Output some text on a screen.\nIf you wanted to do the same thing in Java you just write:\n\npublic class Main { \n public static void main (String[] args) {\n System.out.println("Hello, world!"); \n }\n}\n\nDo you see how much knowledge about programming you must have to achieve the\neasiest task one could think of? Describing the program to a novice programmer\nmay sound like this:\n\nCreate a Main class containing a main-method returning void expecting a\nstring array as a single argument using the println method of the out object of\nclass PrintStream passing your text as a single argument.\n\n— please just don't forget your brackets. This way your first programming hours are guaranteed to\nbe great fun.\nOK. So what are the alternatives? I admit that nobody wants to write BASIC\nanymore because of its lack of a sophisticated standard library for graphics\n(Java doesn't have one either) and its weak scalability. The language has to\nbe clean and straightforward. It should be fast enough for numerical tasks but not as\nwordy as the rigid C-type bracket languages (sorry C++ guys). It should have a\nsmooth learning curve and provide direct feedback (compiled languages often\nsuck at that point). It should encourage clean code and reward best practices.\nOne language that provides all that is Python.\nAnd Python has even more: hundreds of libraries that help you with almost\neverything, good integration into common IDEs (PyDev in Eclipse, IDLE...), a\nprecise and elegant syntax.\nHere is our program from above written in Python:\n\nprint("Hello World")\n\nThere's no need to know about object orientation, scopes and function\narguments at this point. No householding or book-keeping. Yes, it's an\ninterpreted language, but that's not a deal breaker for beginners.\nIf you aren't convinced yet, printing and formatting text output in Java is\nrelatively easy for an advanced programmer but the gruesome stuff begins\nwith file input:\n\nimport java.io.BufferedReader; \nimport java.io.FileNotFoundException; \nimport java.io.FileReader; \nimport java.io.IOException; \n \npublic class fileIO { \n public static void main(String[] args) { \n String filename = "test.txt", line; \n try { \n BufferedReader myFile = \n new BufferedReader(new FileReader(filename)); \n \n while ( ( line = myFile.readLine()) != null) { \n System.out.println(line);\n } \n } catch (FileNotFoundException e) { \n e.printStackTrace(); \n } catch (IOException e) { \n e.printStackTrace(); \n } \n } \n} \n\nI hear you say: "Dude, file I/O is pretty complex. It's just the way it is".\nThat's true... internally . But a beginner should get an easy interface. Python\nshows how it's done:\n\nfile = open("test.txt")\ntext = file.read()\nprint(text);\n\nThe code goes hand in hand with the natural understanding of how the process\nworks: "The computer opens a file, reads it and prints it". Even a five-year-old kid can understand that. Nobody would start to explain: "Before you can\nread a file you need a BufferedReader that works on a FileReader..." even if\nthis is precisely how it works internally. You want to explain the big picture\nat first. The elementary principles of teaching a computer how to do useful\nstuff. Otherwise, you will start frustrating beginners and fool them into\nthinking that they are not bright enough for programming. Programming is fun\nand starting with it is the most crucial step. So don't spoil that\nexperience with layers of unneeded abstraction.\n" + "body": "I recently talked to a student in my neighborhood about his first programming experiences. They started learning Java at school, and it soon turned out to be horrible. A lot of us learned to code in languages like BASIC or Pascal. There was no object orientation, no sophisticated file I&#x2F;O and almost no modularization… and it was great. In BASIC you could just write PRINT &amp;quot;HELLO WORLD&amp;quot; and you were done. This was actually a running program solving a basic and reoccurring problem: Output some text on a screen. If you wanted to do the same thing in Java you just write: public class Main public static void main (String[] args) System.out.println(&amp;quot;Hello, world!&amp;quot;); Do you see how much knowledge about programming you must have to achieve the easiest task one could think of? Describing the program to a novice programmer may sound like this: Create a Main class containing a main-method returning void expecting a string array as a single argument using the println method of the out object of class PrintStream passing your text as a single argument. — please just don’t forget your brackets. This way your first programming hours are guaranteed to be great fun. OK. So what are the alternatives? I admit that nobody wants to write BASIC anymore because of its lack of a sophisticated standard library for graphics (Java doesn’t have one either) and its weak scalability. The language has to be clean and straightforward. It should be fast enough for numerical tasks but not as wordy as the rigid C-type bracket languages (sorry C++ guys). It should have a smooth learning curve and provide direct feedback (compiled languages often suck at that point). It should encourage clean code and reward best practices. One language that provides all that is Python. And Python has even more: hundreds of libraries that help you with almost everything, good integration into common IDEs (PyDev in Eclipse, IDLE…), a precise and elegant syntax. Here is our program from above written in Python: print(&amp;quot;Hello World&amp;quot;) There’s no need to know about object orientation, scopes and function arguments at this point. No householding or book-keeping. Yes, it’s an interpreted language, but that’s not a deal breaker for beginners. If you aren’t convinced yet, printing and formatting text output in Java is relatively easy for an advanced programmer but the gruesome stuff begins with file input: import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; public class fileIO public static void main(String[] args) String filename = &amp;quot;test.txt&amp;quot;, line; try BufferedReader myFile = new BufferedReader(new FileReader(filename)); while ( ( line = myFile.readLine()) != null) System.out.println(line); catch (FileNotFoundException e) e.printStackTrace(); catch (IOException e) e.printStackTrace(); I hear you say: “Dude, file I&#x2F;O is pretty complex. It’s just the way it is”. That’s true… internally . But a beginner should get an easy interface. Python shows how it’s done: file = open(&amp;quot;test.txt&amp;quot;) text = file.read() print(text); The code goes hand in hand with the natural understanding of how the process works: “The computer opens a file, reads it and prints it”. Even a five-year-old kid can understand that. Nobody would start to explain: “Before you can read a file you need a BufferedReader that works on a FileReader…” even if this is precisely how it works internally. You want to explain the big picture at first. The elementary principles of teaching a computer how to do useful stuff. Otherwise, you will start frustrating beginners and fool them into thinking that they are not bright enough for programming. Programming is fun and starting with it is the most crucial step. So don’t spoil that experience with layers of unneeded abstraction. Links Response to this article (almost ten years later): Why Kotlin may be better than Java and Python as the first programming language " }, { "title": "Howto Sort a Vector or a List in C++ using STL", "url": "https://endler.dev/2010/howto-sort-a-vector-or-a-list-in-c-using-stl/", - "body": "A little code snippet that people need very often.\n\n\n/*\n* Howto sort a vector or a list in C++ using STL\n*/\n\n#include <algorithm> // Needed for sort() method\n#include <vector> // STL vector class\n#include <list> // STL list class\n#include <iostream> // Needed for cout,endl\n\nusing namespace std; // Save us some typing\n\n/*\n* This is a comparison function. It can be used to tell sort()\n* how to order the elements in our container (the vector or list).\n* You can write a comparator for every data type (i.e. double, string...).\n*/\nbool comp(const int& num1, const int& num2) {\n return num1 > num2;\n}\n\nint main() {\n // SORTING WITH VECTORS //\n\n // A vector containing integers\n vector<int> v;\n\n // Insert some values\n v.push_back(5);\n v.push_back(12);\n v.push_back(1);\n\n // The generic STL sort function uses three parameters:\n // \n // v.begin() Iterator pointing at the _beginning_ of the container\n // v.end() Iterator pointing at the _end_ of it\n // comp [Optional] A comparison function (see above)\n // \n // The above mentioned iterators must be random access iterators because\n // sort() takes advantage of clever tricks that require direct access to\n // all elements of the vector. This makes it really fast.\n // (Currently introsort is used with O(n*log n) even in worst case).\n\n sort(v.begin(), v.end(), comp);\n\n cout << "Vector: ";\n\n // Iterate over vector elements\n vector<int>::iterator vIt;\n for (vIt = v.begin(); vIt != v.end(); vIt++) {\n // Print current element to standard output\n cout << *vIt << " ";\n }\n cout << endl;\n\n // SORTING WITH LISTS //\n // A list containing integers\n list<int> l;\n\n // Insert some values\n l.push_back(5);\n l.push_back(12);\n l.push_back(1);\n\n // Here is the major difference between vectors and lists in general:\n // Vectors offer fast random access to every element\n // but inserting a new element at the beginning or in the middle is slow.\n // On the other hand inserting into a list is fast but searching for\n // a specific element is slow.\n //\n // Vectors behave much like an array, while lists only allow slow sequential access.\n // Therefore we need a different function to sort all elements that does\n // not need random access iterators.\n // \n // comp [Optional] A comparison function (see above)\n // \n // Note that sort() is specific for the list and is implemented as a\n // member function of list<>. This is feels more object oriented than the vector.\n \n l.sort(comp);\n\n cout << "List: ";\n\n // A pointer to a list element\n list<int>::iterator lIt;\n for (lIt = l.begin(); lIt != l.end(); lIt++) {\n cout << *lIt << " ";\n }\n cout << endl;\n\n return 0;\n}\nCompilation and execution\nSave the above code inside a file, e.g. list_vector.cpp and compile it like so:\n\nclang++ list_vector.cpp\n\nTo run it, execute the resulting binary.\n\n./a.out\nProgram output\n\nVector: 12 5 1\nList: 12 5 1\n" + "body": "A little code snippet that people need very often. &#x2F;* * Howto sort a vector or a list in C++ using STL *&#x2F; #include &amp;lt;algorithm&amp;gt; &#x2F;&#x2F; Needed for sort() method #include &amp;lt;vector&amp;gt; &#x2F;&#x2F; STL vector class #include &amp;lt;list&amp;gt; &#x2F;&#x2F; STL list class #include &amp;lt;iostream&amp;gt; &#x2F;&#x2F; Needed for cout,endl using namespace std; &#x2F;&#x2F; Save us some typing &#x2F;* * This is a comparison function. It can be used to tell sort() * how to order the elements in our container (the vector or list). * You can write a comparator for every data type (i.e. double, string...). *&#x2F; bool comp(const int&amp;amp; num1, const int&amp;amp; num2) return num1 &amp;gt; num2; int main() &#x2F;&#x2F; SORTING WITH VECTORS &#x2F;&#x2F; &#x2F;&#x2F; A vector containing integers vector&amp;lt;int&amp;gt; v; &#x2F;&#x2F; Insert some values v.push_back(5); v.push_back(12); v.push_back(1); &#x2F;&#x2F; The generic STL sort function uses three parameters: &#x2F;&#x2F; &#x2F;&#x2F; v.begin() Iterator pointing at the _beginning_ of the container &#x2F;&#x2F; v.end() Iterator pointing at the _end_ of it &#x2F;&#x2F; comp [Optional] A comparison function (see above) &#x2F;&#x2F; &#x2F;&#x2F; The above mentioned iterators must be random access iterators because &#x2F;&#x2F; sort() takes advantage of clever tricks that require direct access to &#x2F;&#x2F; all elements of the vector. This makes it really fast. &#x2F;&#x2F; (Currently introsort is used with O(n*log n) even in worst case). sort(v.begin(), v.end(), comp); cout &amp;lt;&amp;lt; &amp;quot;Vector: &amp;quot;; &#x2F;&#x2F; Iterate over vector elements vector&amp;lt;int&amp;gt;::iterator vIt; for (vIt = v.begin(); vIt != v.end(); vIt++) &#x2F;&#x2F; Print current element to standard output cout &amp;lt;&amp;lt; *vIt &amp;lt;&amp;lt; &amp;quot; &amp;quot;; cout &amp;lt;&amp;lt; endl; &#x2F;&#x2F; SORTING WITH LISTS &#x2F;&#x2F; &#x2F;&#x2F; A list containing integers list&amp;lt;int&amp;gt; l; &#x2F;&#x2F; Insert some values l.push_back(5); l.push_back(12); l.push_back(1); &#x2F;&#x2F; Here is the major difference between vectors and lists in general: &#x2F;&#x2F; Vectors offer fast random access to every element &#x2F;&#x2F; but inserting a new element at the beginning or in the middle is slow. &#x2F;&#x2F; On the other hand inserting into a list is fast but searching for &#x2F;&#x2F; a specific element is slow. &#x2F;&#x2F; &#x2F;&#x2F; Vectors behave much like an array, while lists only allow slow sequential access. &#x2F;&#x2F; Therefore we need a different function to sort all elements that does &#x2F;&#x2F; not need random access iterators. &#x2F;&#x2F; &#x2F;&#x2F; comp [Optional] A comparison function (see above) &#x2F;&#x2F; &#x2F;&#x2F; Note that sort() is specific for the list and is implemented as a &#x2F;&#x2F; member function of list&amp;lt;&amp;gt;. This is feels more object oriented than the vector. l.sort(comp); cout &amp;lt;&amp;lt; &amp;quot;List: &amp;quot;; &#x2F;&#x2F; A pointer to a list element list&amp;lt;int&amp;gt;::iterator lIt; for (lIt = l.begin(); lIt != l.end(); lIt++) cout &amp;lt;&amp;lt; *lIt &amp;lt;&amp;lt; &amp;quot; &amp;quot;; cout &amp;lt;&amp;lt; endl; return 0; Compilation and execution Save the above code inside a file, e.g. list_vector.cpp and compile it like so: clang++ list_vector.cpp To run it, execute the resulting binary. .&#x2F;a.out Program output Vector: 12 5 1 List: 12 5 1 " }, { "title": "Why I Love Text Files", "url": "https://endler.dev/2010/why-i-love-text-files/", - "body": "Text files are the single most important way we can communicate with computers. It's no coincidence that they are also the most vital way to interact with other human beings. What we can achieve with text files is invaluable: Write it once and refer to it whenever you want to get the message across in the future. Write a program (it's just text), save it and let the machine execute it whenever you like. Write another text file which contains the rules for the execution of your program and the computer runs your application exactly as you specified (cron files do that on Unix).\nText files can be structured in any way you can imagine. Some flavours are JSON, Markdown and SVG. It's all just text. There exist a billion of programs and algorithms to access, modify and distribute text files. You can write them with Emacs, print them on a terminal, pipe them through sed and send them via email to a friend who publishes them on the web. Because text files are so important we have good support for them on any computing system. On Unix, everything is a file and HTML is just structured text. It's a simple and powerful tool to make a contribution to society that outlasts our lives. \nI have a single text file in my mac dock bar which is called TODO.txt. I open it every day, and after years of experimenting with different task management apps from simple command line tools to sophisticated online information storage systems, I always came back to plain text files. And the explanation is simple: If humanity will still be around a thousand years from now, chances are that plain text files are one of the very few file formats that will still be readable.\nThey are an incremental part of how we can modify our environment without even leaving our desk. They have no overhead and can contain a single thought or the complete knowledge of our species. Distributing textual information is so vital for us that we permanently develop faster distribution networks – the fastest by now being the internet.\nOn the web, you have instant access to a virtually endless amount of information and data distributed as plain text files. New web services made accessing the data even easier offering APIs and feeds. You can pull down the data from their servers and make statistics with a programming language of your choice. As you may have noticed, my affinity to text files partially comes from my programming background. As Matt Might correctly points out on his blog: \n\nThe continued dominance of the command line among experts is a testament to the power of linguistic abstraction: when it comes to computing, a word is worth a thousand pictures.\n\nWhenever you like a text on the web, just link to it and create a wonderful chain of ideas. Want to read it later or recommend it to a friend? Just share the text or print it on paper. The fact that we all take such things for granted is a testament for the power of text files and their importance for the information age.\nLinks\n\nUnix Text Processing (PDF)\n\n" + "body": "Text files are the single most important way we can communicate with computers. It’s no coincidence that they are also the most vital way to interact with other human beings. What we can achieve with text files is invaluable: Write it once and refer to it whenever you want to get the message across in the future. Write a program (it’s just text), save it and let the machine execute it whenever you like. Write another text file which contains the rules for the execution of your program and the computer runs your application exactly as you specified (cron files do that on Unix). Text files can be structured in any way you can imagine. Some flavours are JSON, Markdown and SVG. It’s all just text. There exist a billion of programs and algorithms to access, modify and distribute text files. You can write them with Emacs, print them on a terminal, pipe them through sed and send them via email to a friend who publishes them on the web. Because text files are so important we have good support for them on any computing system. On Unix, everything is a file and HTML is just structured text. It’s a simple and powerful tool to make a contribution to society that outlasts our lives. I have a single text file in my mac dock bar which is called TODO.txt. I open it every day, and after years of experimenting with different task management apps from simple command line tools to sophisticated online information storage systems, I always come back to plain text files. And the explanation is simple: If humanity will still be around a thousand years from now, chances are that plain text files are one of the very few file formats that will still be readable. Text files are an incremental part of how we can modify our environment without even leaving our desk. They have no overhead and can contain a single thought or the complete knowledge of our species. Distributing textual information is so vital for us that we permanently develop faster distribution networks – the fastest by now being the internet. On the web, you have instant access to a virtually endless amount of information and data distributed as plain text files. New web services made accessing the data even easier, offering APIs and feeds. You can pull down the data from their servers and make statistics with a programming language of your choice. As you may have noticed, my affinity to text files partially comes from my programming background. As Matt Might correctly points out on his blog: The continued dominance of the command line among experts is a testament to the power of linguistic abstraction: when it comes to computing, a word is worth a thousand pictures. Whenever you like a text on the web, just link to it and create a wonderful chain of ideas. Want to read it later or recommend it to a friend? Just share the text or print it on paper. The fact that we all take such things for granted is a testament for the power of text files and their importance for the information age. " }, { "title": "Running Legacy Code", "url": "https://endler.dev/2009/running-legacy-code/", - "body": "This short article deals with a severe problem in software development: bit rot.\nWhen switching to a new platform (for instance from Windows XP to Windows Vista/7), the programmers need to make sure that old bits of code run flawlessly. There are several ways to achieve this goal that will be discussed in the next paragraphs:\nPorting the code\nThis is generally considered a hard path to follow. For non-trivial legacy code-blocks, chances are high that they contain side-effects and hacks to make them work in different environments. Porting code means replacing parts of the program that use functions and methods that don't exist anymore with new ones which make use of the modern libraries  and routines of the new platform. The significant advantages are maintainable software and sometimes faster running programs. But it may be needed to hack the new platform libraries in order to preserve the whole functionality of an old application. When changing an algorithm inside legacy code, the ported version may become unstable. Thus there may be better ways of maintaining obsolete code today.\nEmulators\nEmulators work much the same like porting the code. You replace old function calls with new ones to make everything work again. However you don't alter the old codebase itself (because you may not have the source code available) but you create a new compatibility layer that "translates" the communication between the underlying operating system and software (our new platform) and our old software. Emulation can also be very fast and run stable for many years but writing an emulator can be even harder than porting the code because an educational guess may be needed to figure out how the program works internally. Additionally, the emulator itself may become obsolete in the future and might eventually  be replaced by a new one.\nVirtual machines\nDuring the last years, a new approach was gaining popularity. The idea is simple: Don't touch anything. Take the whole platform and copy it in order to run old software. The old software runs on top of the old operating system within a virtual machine that runs on the new platform. \nFrom a sane software developers view, this method is ridiculous. A lot of resources are wasted along the way. The system is busier switching contexts from an old platform to the new one and back than running the actual legacy program. However, with cheap and capable hardware everywhere this idea gets more and more interesting. As Steve Atwood coined it:\n\nAlways try to spend your way out of a performance problem first by throwing faster hardware at it.\n\nAnd he's right. The Microsoft developers did the same on their new NT 6.0 platform (Vista, Windows 7, Windows Server 2008...): Windows XP is running on a virtual machine. This way everything behaves just like one would run the software on the old system. And by optimizing the performance bottlenecks (input/output, context switches), one gets a fast and stable, easy to maintain product.\nEvery method has its major advantages and disadvantages. It's on the developer to select the appropriate strategy.\n" + "body": "This short article deals with a severe problem in software development: bit rot. When switching to a new platform (for instance from Windows XP to Windows Vista&#x2F;7), the programmers need to make sure that old bits of code run flawlessly. There are several ways to achieve this goal that will be discussed in the next paragraphs: Porting the code This is generally considered a hard path to follow. For non-trivial legacy code-blocks, chances are high that they contain side-effects and hacks to make them work in different environments. Porting code means replacing parts of the program that use functions and methods that don’t exist anymore with new ones which make use of the modern libraries  and routines of the new platform. The significant advantages are maintainable software and sometimes faster running programs. But it may be needed to hack the new platform libraries in order to preserve the whole functionality of an old application. When changing an algorithm inside legacy code, the ported version may become unstable. Thus there may be better ways of maintaining obsolete code today. Emulators Emulators work much the same like porting the code. You replace old function calls with new ones to make everything work again. However you don’t alter the old codebase itself (because you may not have the source code available) but you create a new compatibility layer that “translates” the communication between the underlying operating system and software (our new platform) and our old software. Emulation can also be very fast and run stable for many years but writing an emulator can be even harder than porting the code because an educational guess may be needed to figure out how the program works internally. Additionally, the emulator itself may become obsolete in the future and might eventually  be replaced by a new one. Virtual machines During the last years, a new approach was gaining popularity. The idea is simple: Don’t touch anything. Take the whole platform and copy it in order to run old software. The old software runs on top of the old operating system within a virtual machine that runs on the new platform. From a sane software developers view, this method is ridiculous. A lot of resources are wasted along the way. The system is busier switching contexts from an old platform to the new one and back than running the actual legacy program. However, with cheap and capable hardware everywhere this idea gets more and more interesting. As Steve Atwood coined it: Always try to spend your way out of a performance problem first by throwing faster hardware at it. And he’s right. The Microsoft developers did the same on their new NT 6.0 platform (Vista, Windows 7, Windows Server 2008…): Windows XP is running on a virtual machine. This way everything behaves just like one would run the software on the old system. And by optimizing the performance bottlenecks (input&#x2F;output, context switches), one gets a fast and stable, easy to maintain product. Every method has its major advantages and disadvantages. It’s on the developer to select the appropriate strategy. " } -] +] \ No newline at end of file From f18c4566e250200e9b50bc8ffacfb934e1cfb32c Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 18 Aug 2025 15:29:37 +0200 Subject: [PATCH 53/58] Replace `Cargo_orig.toml` with `Cargo.toml.template` for asset inclusion --- assets/crate/{Cargo_orig.toml => Cargo.toml.template} | 0 src/bin/utils/assets.rs | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename assets/crate/{Cargo_orig.toml => Cargo.toml.template} (100%) diff --git a/assets/crate/Cargo_orig.toml b/assets/crate/Cargo.toml.template similarity index 100% rename from assets/crate/Cargo_orig.toml rename to assets/crate/Cargo.toml.template diff --git a/src/bin/utils/assets.rs b/src/bin/utils/assets.rs index 9f37450..47ab33b 100644 --- a/src/bin/utils/assets.rs +++ b/src/bin/utils/assets.rs @@ -1,6 +1,6 @@ pub static CRATE_CARGO_TOML: &str = include_str!(concat!( env!("CARGO_MANIFEST_DIR"), - "/assets/crate/Cargo_orig.toml" + "/assets/crate/Cargo.toml.template" )); pub static CRATE_LIB_RS: &str = include_str!(concat!( env!("CARGO_MANIFEST_DIR"), From 93c2b1560a73295bf5bcdc45777bc51a9ef779fe Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 18 Aug 2025 16:02:13 +0200 Subject: [PATCH 54/58] Move howtos to example folder --- README.md | 2 +- howto/hugo.md => examples/hugo/README.md | 0 howto/pelican.md => examples/pelican/README.md | 0 howto/zola.md => examples/zola/README.md | 8 ++++---- 4 files changed, 5 insertions(+), 5 deletions(-) rename howto/hugo.md => examples/hugo/README.md (100%) rename howto/pelican.md => examples/pelican/README.md (100%) rename howto/zola.md => examples/zola/README.md (88%) diff --git a/README.md b/README.md index 739fb6b..51fba4f 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ make demo This will generate WASM files and start a local server. Open http://localhost:8000/demo/ to try it out. -You can also take a look at the code examples for different static site generators [here](https://github.com/mre/tinysearch/tree/master/howto). +You can also take a look at the code examples for different static site generators [here](https://github.com/mre/tinysearch/tree/master/examples). ## Advanced Usage diff --git a/howto/hugo.md b/examples/hugo/README.md similarity index 100% rename from howto/hugo.md rename to examples/hugo/README.md diff --git a/howto/pelican.md b/examples/pelican/README.md similarity index 100% rename from howto/pelican.md rename to examples/pelican/README.md diff --git a/howto/zola.md b/examples/zola/README.md similarity index 88% rename from howto/zola.md rename to examples/zola/README.md index 79bd112..18aa048 100644 --- a/howto/zola.md +++ b/examples/zola/README.md @@ -49,9 +49,10 @@ date = 2025-01-01 ``` **Important notes:** -- The `path` parameter determines the output URL (`/tinysearch.json`) +- The `path` parameter determines the output URL (`tinysearch.json`) - The `template` parameter specifies which template to use -- The file extension in `path` doesn't affect the actual content type +- The `date` field is required to avoid build warnings +- **About the weird path**: Zola will create `public/tinysearch.json/index.html` instead of `public/tinysearch.json` due to how it handles URLs. This is normal Zola behavior - just ignore the strange nested structure. ## Step 3: Build and Process @@ -61,7 +62,7 @@ date = 2025-01-01 ``` 2. **Find the generated JSON:** - The search index will be at `public/tinysearch.json/index.html` + The search index will be at `public/tinysearch.json/index.html` (yes, that's a weird path, but it's how Zola works) 3. **Run tinysearch:** ```bash @@ -104,7 +105,6 @@ You can extend the macro to include additional metadata: - Test the generated JSON with a validator ### Build Errors -- Make sure the `tinysearch_macros.html` file is in the `templates/` directory - Check that all template syntax is correct (Tera uses `{%` and `{{` syntax) This setup will create a comprehensive search index that tinysearch can process into an efficient WebAssembly search module for your Zola site. From a97e50f7602392eea2a601edebf495564ad48128 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 25 Aug 2025 16:34:09 +0200 Subject: [PATCH 55/58] Use `include!` for JS glue code loader script --- assets/tinysearch_loader.js | 72 ++++++++++++++++++++++++++++++++++ src/bin/tinysearch.rs | 78 +------------------------------------ src/bin/utils/assets.rs | 5 +++ 3 files changed, 78 insertions(+), 77 deletions(-) create mode 100644 assets/tinysearch_loader.js diff --git a/assets/tinysearch_loader.js b/assets/tinysearch_loader.js new file mode 100644 index 0000000..9c47bcf --- /dev/null +++ b/assets/tinysearch_loader.js @@ -0,0 +1,72 @@ +class TinySearchWasm { + constructor(wasmInstance) { + this.wasm = wasmInstance; + this.memory = wasmInstance.exports.memory; + this.searchFn = wasmInstance.exports.search; + this.freeFn = wasmInstance.exports.free_search_result; + } + + // Convert JS string to WASM memory + stringToWasm(str) { + const bytes = new TextEncoder().encode(str + '\0'); + const ptr = this.wasm.exports.malloc ? this.wasm.exports.malloc(bytes.length) : this.allocString(bytes.length); + const mem = new Uint8Array(this.memory.buffer, ptr, bytes.length); + mem.set(bytes); + return ptr; + } + + // Read string from WASM memory + wasmToString(ptr) { + if (ptr === 0) return null; + const mem = new Uint8Array(this.memory.buffer); + let end = ptr; + while (mem[end] !== 0) end++; + return new TextDecoder().decode(mem.subarray(ptr, end)); + } + + // Simple string allocation fallback + allocString(len) { + // This is a simple fallback - WASM linear memory grows as needed + const pages = Math.ceil(len / 65536); + this.memory.grow(pages); + return this.memory.buffer.byteLength - len; + } + + // Perform search + search(query, numResults = 5) { + const queryPtr = this.stringToWasm(query); + const resultPtr = this.searchFn(queryPtr, numResults); + + if (resultPtr === 0) { + return []; + } + + const jsonStr = this.wasmToString(resultPtr); + this.freeFn(resultPtr); + + try { + return JSON.parse(jsonStr); + } catch (e) { + console.error('Failed to parse search results:', e); + return []; + } + } +} + +export async function init_tinysearch() { + try { + // Try streaming first (preferred) + const wasmModule = await WebAssembly.instantiateStreaming(fetch('./{WASM_FILE}')); + return new TinySearchWasm(wasmModule.instance); + } catch (e) { + console.warn('Streaming failed, falling back to fetch + instantiate:', e.message); + // Fallback for servers with wrong MIME type + const response = await fetch('./{WASM_FILE}'); + const wasmBytes = await response.arrayBuffer(); + const wasmModule = await WebAssembly.instantiate(wasmBytes); + return new TinySearchWasm(wasmModule.instance); + } +} + +// Backward compatibility +export { TinySearchWasm as TinySearch }; \ No newline at end of file diff --git a/src/bin/tinysearch.rs b/src/bin/tinysearch.rs index eb5ee1a..68b9f44 100644 --- a/src/bin/tinysearch.rs +++ b/src/bin/tinysearch.rs @@ -366,83 +366,7 @@ impl Stage for Wasm { })?; // Generate simple JS loader - let js_content = format!( - r#" -class TinySearchWasm {{ - constructor(wasmInstance) {{ - this.wasm = wasmInstance; - this.memory = wasmInstance.exports.memory; - this.searchFn = wasmInstance.exports.search; - this.freeFn = wasmInstance.exports.free_search_result; - }} - - // Convert JS string to WASM memory - stringToWasm(str) {{ - const bytes = new TextEncoder().encode(str + '\0'); - const ptr = this.wasm.exports.malloc ? this.wasm.exports.malloc(bytes.length) : this.allocString(bytes.length); - const mem = new Uint8Array(this.memory.buffer, ptr, bytes.length); - mem.set(bytes); - return ptr; - }} - - // Read string from WASM memory - wasmToString(ptr) {{ - if (ptr === 0) return null; - const mem = new Uint8Array(this.memory.buffer); - let end = ptr; - while (mem[end] !== 0) end++; - return new TextDecoder().decode(mem.subarray(ptr, end)); - }} - - // Simple string allocation fallback - allocString(len) {{ - // This is a simple fallback - WASM linear memory grows as needed - const pages = Math.ceil(len / 65536); - this.memory.grow(pages); - return this.memory.buffer.byteLength - len; - }} - - // Perform search - search(query, numResults = 5) {{ - const queryPtr = this.stringToWasm(query); - const resultPtr = this.searchFn(queryPtr, numResults); - - if (resultPtr === 0) {{ - return []; - }} - - const jsonStr = this.wasmToString(resultPtr); - this.freeFn(resultPtr); - - try {{ - return JSON.parse(jsonStr); - }} catch (e) {{ - console.error('Failed to parse search results:', e); - return []; - }} - }} -}} - -export async function init_tinysearch() {{ - try {{ - // Try streaming first (preferred) - const wasmModule = await WebAssembly.instantiateStreaming(fetch('./{wasm_file}')); - return new TinySearchWasm(wasmModule.instance); - }} catch (e) {{ - console.warn('Streaming failed, falling back to fetch + instantiate:', e.message); - // Fallback for servers with wrong MIME type - const response = await fetch('./{wasm_file}'); - const wasmBytes = await response.arrayBuffer(); - const wasmModule = await WebAssembly.instantiate(wasmBytes); - return new TinySearchWasm(wasmModule.instance); - }} -}} - -// Backward compatibility -export {{ TinySearchWasm as TinySearch }}; -"#, - wasm_file = wasm_file - ); + let js_content = assets::JS_LOADER.replace("{WASM_FILE}", &wasm_file); let js_path = self.out_path.join(format!("{}.js", &wasm_name)); if !self.release { diff --git a/src/bin/utils/assets.rs b/src/bin/utils/assets.rs index 47ab33b..71a2ad2 100644 --- a/src/bin/utils/assets.rs +++ b/src/bin/utils/assets.rs @@ -12,3 +12,8 @@ pub static DEMO_HTML: &str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/ pub static STOP_WORDS: &str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/assets/stopwords")); + +pub static JS_LOADER: &str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/assets/tinysearch_loader.js" +)); From 645bde0226522da11d7170b48b718720b31d96bb Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Sat, 30 Aug 2025 00:43:36 +0200 Subject: [PATCH 56/58] Make fields configurable with tinysearch.toml (#181) Introduces a `tinysearch.toml`, which can be used to fully customize the indexed fields. ## Example For an e-commerce site where you want to search product titles and descriptions but also store metadata like prices and image URLs: ```toml [schema] indexed_fields = ["title", "description", "category", "tags"] metadata_fields = ["price", "image_url", "brand", "availability"] url_field = "product_url" ``` JSON structure: ```json [ { "title": "Wireless Headphones", "description": "High-quality wireless headphones with noise cancellation", "category": "Electronics", "tags": "audio headphones wireless bluetooth", "product_url": "https://store.example.com/headphones-123", "price": "$199.99", "image_url": "https://store.example.com/images/headphones.jpg", "brand": "TechAudio", "availability": "In Stock" } ] ``` Closes #116 --- Cargo.lock | 27 +++ Cargo.toml | 2 + README.md | 70 ++++++++ examples/README.md | 100 +++++++++++ examples/blog/README.md | 64 +++++++ examples/blog/posts.json | 122 +++++++++++++ examples/blog/tinysearch.toml | 11 ++ examples/documentation/README.md | 73 ++++++++ examples/documentation/docs.json | 134 +++++++++++++++ examples/documentation/tinysearch.toml | 11 ++ examples/ecommerce/README.md | 59 +++++++ examples/ecommerce/products.json | 132 ++++++++++++++ examples/ecommerce/tinysearch.toml | 11 ++ examples/index.json | 35 ++++ examples/tinysearch.toml | 16 ++ src/bin/tinysearch.rs | 26 ++- src/bin/utils/index.rs | 7 +- src/bin/utils/storage.rs | 227 ++++++++++++++++++++++++- src/lib.rs | 207 ++++++++++++++++++++++ tests/integration_test.rs | 208 ++++++++++++++++++++++ tinysearch.toml | 10 ++ 21 files changed, 1532 insertions(+), 20 deletions(-) create mode 100644 examples/README.md create mode 100644 examples/blog/README.md create mode 100644 examples/blog/posts.json create mode 100644 examples/blog/tinysearch.toml create mode 100644 examples/documentation/README.md create mode 100644 examples/documentation/docs.json create mode 100644 examples/documentation/tinysearch.toml create mode 100644 examples/ecommerce/README.md create mode 100644 examples/ecommerce/products.json create mode 100644 examples/ecommerce/tinysearch.toml create mode 100644 examples/index.json create mode 100644 examples/tinysearch.toml create mode 100644 tinysearch.toml diff --git a/Cargo.lock b/Cargo.lock index 29e769d..777942e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -267,6 +267,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "strip_markdown" version = "0.2.0" @@ -336,15 +345,31 @@ dependencies = [ "strip_markdown", "strum", "tempfile", + "toml", "toml_edit", "xorf", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + [[package]] name = "toml_datetime" version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] [[package]] name = "toml_edit" @@ -353,6 +378,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap", + "serde", + "serde_spanned", "toml_datetime", "toml_write", "winnow", diff --git a/Cargo.toml b/Cargo.toml index ccae25d..793fa25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ tempfile = { version = "3.14.0", optional = true } strip_markdown = { version = "0.2.0", optional = true } strum = { version = "0.26.3", features = ["derive"], optional = true } +toml = { version = "0.8.19", optional = true } toml_edit = { version = "0.22.22", optional = true } @@ -53,6 +54,7 @@ bin = [ "tempfile", "strip_markdown", "strum", + "toml", "toml_edit", ] diff --git a/README.md b/README.md index 51fba4f..6eba841 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,24 @@ Please take a look at the [example file](fixtures/index.json). ℹ️ The `body` field in the JSON document is optional and can be skipped to just index post titles. +### Configuration + +You can customize which fields are indexed and which are stored as metadata using a `tinysearch.toml` configuration file. Place this file in the same directory as your JSON index file. + +```toml +[schema] +# Fields that will be indexed for full-text search +indexed_fields = ["title", "body", "description"] + +# Fields that will be stored as metadata but not indexed +metadata_fields = ["author", "date", "category", "image_url"] + +# Field that contains the URL for each document +url_field = "url" +``` + +If no configuration file is found, tinysearch will use the default schema (indexing `title` and `body` fields with `url` as the URL field). + Once you created the index, you can generate a WebAssembly search engine: ```sh @@ -101,6 +119,58 @@ This will generate WASM files and start a local server. Open http://localhost:80 You can also take a look at the code examples for different static site generators [here](https://github.com/mre/tinysearch/tree/master/examples). +### Configuration Examples + +#### E-commerce Site with Product Metadata + +For an e-commerce site where you want to search product titles and descriptions but also store metadata like prices and image URLs: + +```toml +[schema] +indexed_fields = ["title", "description", "category", "tags"] +metadata_fields = ["price", "image_url", "brand", "availability"] +url_field = "product_url" +``` + +JSON structure: +```json +[ + { + "title": "Wireless Headphones", + "description": "High-quality wireless headphones with noise cancellation", + "category": "Electronics", + "tags": "audio headphones wireless bluetooth", + "product_url": "https://store.example.com/headphones-123", + "price": "$199.99", + "image_url": "https://store.example.com/images/headphones.jpg", + "brand": "TechAudio", + "availability": "In Stock" + } +] +``` + +#### Blog with Author and Date Information + +For a blog where you want to search titles and content but also store author and publication metadata: + +```toml +[schema] +indexed_fields = ["title", "body", "excerpt"] +metadata_fields = ["author", "publish_date", "tags", "featured_image"] +url_field = "permalink" +``` + +#### Documentation Site + +For a documentation site where you want extensive search across multiple content types: + +```toml +[schema] +indexed_fields = ["title", "content", "section", "keywords"] +metadata_fields = ["version", "last_updated", "contributor"] +url_field = "doc_url" +``` + ## Advanced Usage For advanced usage options, run diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..c00c736 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,100 @@ +# TinySearch Examples + +This directory contains comprehensive examples demonstrating TinySearch usage for different types of websites and applications. + +## Available Examples + +### 🛒 [E-commerce](./ecommerce/) +Product catalog search with pricing, availability, and category metadata. +- **Data**: 10 sample products across multiple categories +- **Fields**: Product names, descriptions, categories, tags, pricing, availability +- **Use Case**: Online stores, marketplaces, product directories + +### 📝 [Blog](./blog/) +Technical blog search with author information and content categorization. +- **Data**: 10 technical articles on programming and web development +- **Fields**: Titles, content, excerpts, tags, authors, publication dates +- **Use Case**: Personal blogs, corporate blogs, technical documentation + +### 📚 [Documentation](./documentation/) +Comprehensive documentation search with version control and organization. +- **Data**: 11 documentation pages covering features, guides, and references +- **Fields**: Titles, content, sections, keywords, versions, contributors +- **Use Case**: Software documentation, API docs, knowledge bases + +## Quick Start + +Each example directory contains: +- `tinysearch.toml` - Configuration file defining the search schema +- `*.json` - Sample data file with realistic content +- `README.md` - Detailed instructions and usage examples + +To try any example: + +```bash +cd examples/[example-name] +tinysearch -m storage -p ./output [data-file].json +tinysearch -m search -S "your query" -N 5 ./output/storage +``` + +## Schema Customization + +Each example demonstrates different schema configurations: + +| Example | Indexed Fields | Metadata Fields | URL Field | +|---------|---------------|----------------|-----------| +| E-commerce | product_name, description, category, tags | price, brand, availability, rating | product_url | +| Blog | title, content, excerpt, tags | author, publish_date, category, reading_time | permalink | +| Documentation | title, content, section, keywords | version, last_updated, contributor, difficulty | doc_url | + +## Integration Examples + +### Static Site Generators + +All examples work with popular static site generators: + +- **Jekyll**: Use liquid templates to generate JSON from post frontmatter +- **Hugo**: Leverage JSON output formats for automatic index generation +- **Gatsby**: Generate indices programmatically during build process +- **Next.js**: Create JSON during static generation phase + +### Web Integration + +Generate WASM files for browser deployment: + +```bash +# Development with demo interface +tinysearch -m wasm -p ./wasm_output [data-file].json + +# Production deployment +tinysearch --release -m wasm -p ./wasm_output [data-file].json +``` + +## Performance Characteristics + +| Example | Index Size | Search Time | Memory Usage | +|---------|------------|-------------|--------------| +| E-commerce (10 products) | ~8KB | <1ms | ~2MB | +| Blog (10 posts) | ~12KB | <1ms | ~3MB | +| Documentation (11 pages) | ~15KB | <1ms | ~4MB | + +*Measurements approximate, actual results vary by content and browser* + +## Best Practices + +1. **Field Selection**: Only index fields users will search; use metadata for display-only data +2. **Content Optimization**: Remove HTML tags and minimize unnecessary text +3. **Keyword Strategy**: Include relevant keywords and synonyms in indexed fields +4. **File Size**: Consider splitting large datasets into multiple indices +5. **WASM Optimization**: Use `--optimize` flag for production deployments + +## Contributing Examples + +To contribute new examples: +1. Create a new directory with a descriptive name +2. Include `tinysearch.toml`, sample JSON data, and README.md +3. Ensure data is realistic and demonstrates clear use cases +4. Test all commands mentioned in the README +5. Submit a pull request with your example + +Examples should showcase different industries, content types, or technical approaches to help users understand TinySearch's flexibility. \ No newline at end of file diff --git a/examples/blog/README.md b/examples/blog/README.md new file mode 100644 index 0000000..1faeaed --- /dev/null +++ b/examples/blog/README.md @@ -0,0 +1,64 @@ +# Blog Search Example + +This example demonstrates how to use TinySearch for a technical blog with rich content and metadata. + +## Configuration + +The `tinysearch.toml` file configures the search to: + +- **Index**: `title`, `content`, `excerpt`, and `tags` for comprehensive search coverage +- **Store as metadata**: `author`, `publish_date`, `category`, `reading_time`, `featured_image` +- **Use**: `permalink` as the link field for each blog post + +## Sample Data + +The `posts.json` file contains 10 technical blog posts covering: +- Programming languages (Rust, JavaScript) +- Web development (frameworks, CSS, performance) +- Backend development (databases, APIs) +- Cloud computing and architecture +- DevOps (Git workflows) +- Security best practices + +Each post includes realistic content, author information, publication dates, and categorization. + +## Usage + +From this directory, run: + +```bash +# Generate the search index +tinysearch -m storage -p ./output posts.json + +# Search for programming topics +tinysearch -m search -S "rust programming" -N 3 ./output/storage + +# Search for web development content +tinysearch -m search -S "javascript frameworks" -N 5 ./output/storage + +# Find security-related posts +tinysearch -m search -S "security vulnerabilities" -N 2 ./output/storage +``` + +## For Blog Integration + +Generate WASM files for your blog: + +```bash +# Development version with demo +tinysearch -m wasm -p ./wasm_output posts.json + +# Production version for deployment +tinysearch --release -m wasm -p ./wasm_output posts.json +``` + +## Search Examples + +Readers can search for: +- **Technologies**: "rust", "javascript", "css", "database" +- **Topics**: "performance", "security", "architecture", "optimization" +- **Categories**: "frontend", "backend", "devops", "cloud" +- **Authors**: "Sarah Chen", "Alex Rodriguez" +- **Concepts**: "microservices", "responsive design", "git workflows" + +Search results include author names, publication dates, reading times, and categories to help readers find exactly what they're looking for. \ No newline at end of file diff --git a/examples/blog/posts.json b/examples/blog/posts.json new file mode 100644 index 0000000..c43a507 --- /dev/null +++ b/examples/blog/posts.json @@ -0,0 +1,122 @@ +[ + { + "title": "Getting Started with Rust: A Complete Guide", + "content": "Rust has emerged as one of the most beloved programming languages, combining the performance of systems languages like C++ with the safety and ergonomics of modern languages. In this comprehensive guide, we'll explore why Rust is gaining popularity and how you can start your journey with this powerful language. Rust's unique ownership system prevents common bugs like null pointer dereferences and buffer overflows at compile time. The language excels in systems programming, web development, and even WebAssembly applications. We'll cover setting up your development environment, understanding the borrow checker, and building your first Rust application.", + "excerpt": "Discover why Rust is revolutionizing systems programming with its unique approach to memory safety and zero-cost abstractions.", + "tags": "rust programming systems-programming memory-safety tutorial beginners", + "permalink": "https://blog.example.com/getting-started-with-rust", + "author": "Sarah Chen", + "publish_date": "2024-01-15", + "category": "Programming", + "reading_time": "12 min read", + "featured_image": "https://blog.example.com/images/rust-guide.jpg" + }, + { + "title": "Building Modern Web Applications with JavaScript Frameworks", + "content": "The JavaScript ecosystem has evolved tremendously over the past decade. Today's developers have access to powerful frameworks like React, Vue.js, and Svelte that make building complex web applications more manageable and enjoyable. This article explores the current state of JavaScript frameworks, comparing their strengths and use cases. We'll dive into component-based architecture, state management patterns, and modern development workflows. Whether you're building a simple landing page or a complex single-page application, choosing the right framework is crucial for project success. We'll also cover emerging trends like server-side rendering, static site generation, and the growing importance of web performance.", + "excerpt": "An in-depth comparison of modern JavaScript frameworks and how to choose the right one for your next project.", + "tags": "javascript react vue svelte web-development frontend frameworks spa", + "permalink": "https://blog.example.com/modern-javascript-frameworks", + "author": "Alex Rodriguez", + "publish_date": "2024-01-20", + "category": "Web Development", + "reading_time": "8 min read", + "featured_image": "https://blog.example.com/images/js-frameworks.jpg" + }, + { + "title": "Database Design Best Practices for Scalable Applications", + "content": "Proper database design is the foundation of any scalable application. Poor design decisions made early in development can lead to performance bottlenecks, data inconsistencies, and maintenance nightmares as your application grows. In this comprehensive guide, we'll explore fundamental principles of database design, from normalization and denormalization trade-offs to indexing strategies and query optimization. We'll cover relational database concepts, NoSQL alternatives, and hybrid approaches. Key topics include entity-relationship modeling, choosing appropriate data types, designing efficient schemas, and planning for future growth. Real-world examples demonstrate how these principles apply to different application types, from e-commerce platforms to content management systems.", + "excerpt": "Learn essential database design principles that will save you from performance and scalability issues down the road.", + "tags": "database design sql nosql scalability performance optimization schema", + "permalink": "https://blog.example.com/database-design-best-practices", + "author": "Maria Gonzalez", + "publish_date": "2024-01-25", + "category": "Backend Development", + "reading_time": "15 min read", + "featured_image": "https://blog.example.com/images/database-design.jpg" + }, + { + "title": "The Rise of WebAssembly: Beyond JavaScript Performance", + "content": "WebAssembly (WASM) is transforming what's possible in web browsers by allowing developers to run high-performance code written in languages like Rust, C++, and Go directly in the browser. Originally designed to address JavaScript's performance limitations, WebAssembly has evolved into a versatile platform for bringing desktop-class applications to the web. This article explores WASM's architecture, its growing ecosystem, and practical use cases. We'll examine how companies are using WebAssembly for everything from image processing and games to CAD applications and scientific computing. The integration with JavaScript remains seamless, allowing developers to use WASM modules alongside existing web technologies. We'll also discuss the future of WebAssembly, including WASI and its potential beyond browsers.", + "excerpt": "Explore how WebAssembly is enabling new categories of web applications with near-native performance.", + "tags": "webassembly wasm performance rust cpp web-development browser-technology", + "permalink": "https://blog.example.com/rise-of-webassembly", + "author": "David Kim", + "publish_date": "2024-02-01", + "category": "Web Development", + "reading_time": "10 min read", + "featured_image": "https://blog.example.com/images/webassembly.jpg" + }, + { + "title": "Mastering Git: Advanced Workflows for Development Teams", + "content": "Git is more than just version control—it's a powerful tool for coordinating complex software development workflows. While most developers are comfortable with basic Git commands, mastering advanced techniques can significantly improve team productivity and code quality. This comprehensive guide covers advanced Git workflows including GitFlow, GitHub Flow, and custom branching strategies. We'll explore interactive rebasing, cherry-picking, and conflict resolution strategies. Advanced topics include submodules, hooks, and automation techniques that can streamline your development process. Whether you're working on open source projects or enterprise applications, understanding these Git patterns will make you a more effective developer and team member.", + "excerpt": "Level up your Git skills with advanced workflows, branching strategies, and collaboration techniques.", + "tags": "git version-control workflow development-teams collaboration branching", + "permalink": "https://blog.example.com/mastering-git-workflows", + "author": "Jennifer Liu", + "publish_date": "2024-02-05", + "category": "DevOps", + "reading_time": "14 min read", + "featured_image": "https://blog.example.com/images/git-workflows.jpg" + }, + { + "title": "Cloud Architecture Patterns for Resilient Systems", + "content": "Building resilient systems in the cloud requires understanding architectural patterns that can handle failures gracefully. Modern cloud applications must be designed for inevitable failures, network partitions, and scaling demands. This article explores proven patterns like Circuit Breaker, Bulkhead, and Retry with exponential backoff. We'll examine microservices architecture, event-driven design, and strategies for achieving high availability. Key topics include load balancing, caching strategies, database replication, and monitoring. Real-world case studies demonstrate how companies like Netflix and Amazon implement these patterns at scale. We'll also cover newer concepts like chaos engineering and observability-driven development that are becoming essential for cloud-native applications.", + "excerpt": "Discover architectural patterns that help build robust, scalable cloud applications that gracefully handle failures.", + "tags": "cloud-architecture microservices resilience scalability aws azure patterns", + "permalink": "https://blog.example.com/cloud-architecture-patterns", + "author": "Michael Torres", + "publish_date": "2024-02-10", + "category": "Cloud Computing", + "reading_time": "16 min read", + "featured_image": "https://blog.example.com/images/cloud-architecture.jpg" + }, + { + "title": "API Design: RESTful Services vs GraphQL vs gRPC", + "content": "API design decisions significantly impact application performance, developer experience, and long-term maintainability. With multiple API paradigms available—REST, GraphQL, and gRPC—choosing the right approach for your project can be challenging. This comprehensive comparison examines each approach's strengths, trade-offs, and ideal use cases. REST remains popular for its simplicity and cachability, while GraphQL offers flexible data fetching and strong typing. gRPC excels in microservices communication with its efficiency and code generation capabilities. We'll explore practical considerations like tooling, ecosystem support, and team expertise. Real examples demonstrate implementation patterns, authentication strategies, and versioning approaches for each API style.", + "excerpt": "Navigate the API landscape with a detailed comparison of REST, GraphQL, and gRPC approaches.", + "tags": "api-design rest graphql grpc web-services backend microservices", + "permalink": "https://blog.example.com/api-design-comparison", + "author": "Sophie Anderson", + "publish_date": "2024-02-15", + "category": "Backend Development", + "reading_time": "11 min read", + "featured_image": "https://blog.example.com/images/api-design.jpg" + }, + { + "title": "Modern CSS: Grid, Flexbox, and Container Queries", + "content": "CSS has evolved dramatically in recent years, introducing powerful layout systems and responsive design capabilities that were previously impossible or required complex JavaScript solutions. CSS Grid and Flexbox have revolutionized how we approach layout design, while newer features like Container Queries are changing responsive design paradigms. This article provides a comprehensive guide to modern CSS techniques, covering when to use Grid vs Flexbox, creating complex layouts with minimal code, and building truly responsive components. We'll explore practical examples ranging from simple card layouts to complex magazine-style designs. Advanced topics include CSS custom properties, logical properties, and the latest selector features that make CSS more maintainable and powerful.", + "excerpt": "Master modern CSS techniques including Grid, Flexbox, and the latest responsive design patterns.", + "tags": "css grid flexbox responsive-design container-queries modern-css frontend", + "permalink": "https://blog.example.com/modern-css-techniques", + "author": "Emma Thompson", + "publish_date": "2024-02-20", + "category": "Frontend Development", + "reading_time": "9 min read", + "featured_image": "https://blog.example.com/images/modern-css.jpg" + }, + { + "title": "Security Best Practices for Web Applications", + "content": "Web application security is more critical than ever as cyber threats continue to evolve. Developers must understand common vulnerabilities and implement robust security measures from the ground up. This comprehensive guide covers the OWASP Top 10 vulnerabilities and practical mitigation strategies. Key topics include input validation, authentication systems, authorization patterns, and secure session management. We'll explore modern security headers, Content Security Policy implementation, and protection against common attacks like SQL injection, XSS, and CSRF. The article also covers secure coding practices, dependency management, and security testing approaches. Real-world examples demonstrate how security breaches occur and how proper implementation prevents them.", + "excerpt": "Protect your web applications with essential security practices and defense strategies against common threats.", + "tags": "web-security owasp authentication authorization xss csrf sql-injection", + "permalink": "https://blog.example.com/web-application-security", + "author": "Carlos Mendez", + "publish_date": "2024-02-25", + "category": "Security", + "reading_time": "13 min read", + "featured_image": "https://blog.example.com/images/web-security.jpg" + }, + { + "title": "Performance Optimization Techniques for Modern Web Apps", + "content": "Web performance directly impacts user experience, conversion rates, and search engine rankings. As applications become more complex, developers must employ sophisticated optimization techniques to maintain fast loading times and smooth interactions. This article covers comprehensive performance optimization strategies, from initial page load to runtime performance. We'll explore code splitting, lazy loading, image optimization, and caching strategies. Advanced topics include service workers, web workers, and progressive enhancement techniques. The guide includes practical tools for measuring performance, identifying bottlenecks, and monitoring real user metrics. Modern frameworks offer built-in optimizations, but understanding the underlying principles helps developers make informed decisions and create consistently fast applications.", + "excerpt": "Comprehensive guide to web performance optimization covering loading strategies, runtime optimization, and monitoring techniques.", + "tags": "web-performance optimization loading-speed caching service-workers metrics", + "permalink": "https://blog.example.com/web-performance-optimization", + "author": "Ryan Park", + "publish_date": "2024-03-01", + "category": "Frontend Development", + "reading_time": "17 min read", + "featured_image": "https://blog.example.com/images/performance-optimization.jpg" + } +] \ No newline at end of file diff --git a/examples/blog/tinysearch.toml b/examples/blog/tinysearch.toml new file mode 100644 index 0000000..697c196 --- /dev/null +++ b/examples/blog/tinysearch.toml @@ -0,0 +1,11 @@ +[schema] +# Fields that will be indexed for full-text search +# Readers can search by title, content, excerpt, and tags +indexed_fields = ["title", "content", "excerpt", "tags"] + +# Fields that will be stored as metadata but not indexed +# These provide context in search results +metadata_fields = ["author", "publish_date", "category", "reading_time", "featured_image"] + +# Field that contains the URL for each blog post +url_field = "permalink" \ No newline at end of file diff --git a/examples/documentation/README.md b/examples/documentation/README.md new file mode 100644 index 0000000..dad45e1 --- /dev/null +++ b/examples/documentation/README.md @@ -0,0 +1,73 @@ +# Documentation Search Example + +This example demonstrates how to use TinySearch for a comprehensive documentation website with organized content and metadata. + +## Configuration + +The `tinysearch.toml` file configures the search to: + +- **Index**: `title`, `content`, `section`, and `keywords` for thorough documentation coverage +- **Store as metadata**: `version`, `last_updated`, `contributor`, `difficulty`, `type` +- **Use**: `doc_url` as the link field for each documentation page + +## Sample Data + +The `docs.json` file contains 11 documentation pages covering: +- **Getting Started**: Installation and quickstart guide +- **Configuration**: Schema setup and customization options +- **Reference**: JSON format, CLI commands, API documentation +- **Integration**: WebAssembly, static site generators, performance +- **Support**: Troubleshooting, examples, contributing guidelines + +Each page includes realistic technical content, version information, difficulty levels, and contributor details. + +## Usage + +From this directory, run: + +```bash +# Generate the search index +tinysearch -m storage -p ./output docs.json + +# Search for configuration help +tinysearch -m search -S "configuration schema" -N 3 ./output/storage + +# Find integration guides +tinysearch -m search -S "webassembly integration" -N 2 ./output/storage + +# Look for troubleshooting info +tinysearch -m search -S "errors troubleshooting" -N 5 ./output/storage +``` + +## For Documentation Sites + +Generate WASM files for your docs: + +```bash +# Development version with demo +tinysearch -m wasm -p ./wasm_output docs.json + +# Production version for deployment +tinysearch --release -m wasm -p ./wasm_output docs.json +``` + +## Search Examples + +Users can search for: +- **Features**: "configuration", "webassembly", "json format", "cli commands" +- **Sections**: "getting started", "api reference", "troubleshooting" +- **Difficulty**: "beginner", "intermediate", "advanced" +- **Content Types**: "guide", "reference", "examples" +- **Technical Terms**: "wasm", "javascript", "integration", "performance" +- **Topics**: "installation", "optimization", "static site generators" + +Search results include version numbers, last updated dates, difficulty levels, and content types to help users find the most relevant and up-to-date information for their needs. + +## Documentation Features + +This example showcases TinySearch capabilities for documentation sites: +- **Multi-level organization** with sections and subsections +- **Version tracking** for maintaining multiple doc versions +- **Contributor attribution** for community-driven documentation +- **Content classification** by difficulty and type +- **Comprehensive keyword indexing** for precise search results \ No newline at end of file diff --git a/examples/documentation/docs.json b/examples/documentation/docs.json new file mode 100644 index 0000000..f3f3374 --- /dev/null +++ b/examples/documentation/docs.json @@ -0,0 +1,134 @@ +[ + { + "title": "Getting Started Guide", + "content": "Welcome to TinySearch! This guide will help you get up and running with TinySearch for your static website. TinySearch is a lightweight, fast, full-text search engine written in Rust and compiled to WebAssembly. It's designed specifically for static websites where you need client-side search functionality without requiring a backend server. The entire search index is embedded in a small WASM file that runs directly in the browser. Installation is straightforward using cargo, and the generated output can be integrated into any static site generator like Jekyll, Hugo, or Gatsby.", + "section": "Introduction", + "keywords": "installation setup quickstart beginner guide static-website", + "doc_url": "https://docs.example.com/getting-started", + "version": "v0.9.0", + "last_updated": "2024-03-01", + "contributor": "TinySearch Team", + "difficulty": "Beginner", + "type": "Guide" + }, + { + "title": "Configuration Reference", + "content": "TinySearch can be configured using a tinysearch.toml file to customize which JSON fields are indexed versus stored as metadata. The schema section allows you to specify indexed_fields for full-text search, metadata_fields for display-only data, and url_field for the link destination. This flexible configuration system allows TinySearch to work with any JSON structure, from simple blog posts to complex e-commerce catalogs. Field mapping is automatic, and the system intelligently handles different JSON value types including strings, numbers, booleans, and arrays. Advanced configuration options include custom stopwords, stemming rules, and index optimization settings.", + "section": "Configuration", + "keywords": "tinysearch.toml schema configuration json fields indexed metadata", + "doc_url": "https://docs.example.com/configuration", + "version": "v0.9.0", + "last_updated": "2024-02-28", + "contributor": "Sarah Chen", + "difficulty": "Intermediate", + "type": "Reference" + }, + { + "title": "JSON Input Format", + "content": "TinySearch expects input data as a JSON array of objects, where each object represents a searchable document. The structure is flexible - you can include any fields you need. Common fields include title, body or content, url, author, date, and category, but you can use custom field names that match your data structure. The tinysearch.toml configuration file determines which fields get indexed for searching versus stored for display. Arrays of strings are automatically concatenated for indexing. Nested objects are flattened, and null values are handled gracefully. The JSON format supports standard data types and UTF-8 encoding for international content.", + "section": "Input Format", + "keywords": "json input format structure fields documents array objects", + "doc_url": "https://docs.example.com/json-format", + "version": "v0.9.0", + "last_updated": "2024-02-25", + "contributor": "Alex Rodriguez", + "difficulty": "Beginner", + "type": "Reference" + }, + { + "title": "CLI Commands and Options", + "content": "The TinySearch command-line interface provides several modes for different use cases. Storage mode generates the search index from your JSON input. Search mode allows testing queries against a generated index. WASM mode creates WebAssembly files for browser deployment, including JavaScript glue code and an optional demo HTML file. Additional options include --release for production builds without demo files, --optimize for size optimization using wasm-opt, and --engine-version for specifying the TinySearch library version. Path options control output directories, and verbose logging helps debug configuration issues.", + "section": "Command Line", + "keywords": "cli commands options storage wasm search release optimize flags", + "doc_url": "https://docs.example.com/cli-reference", + "version": "v0.9.0", + "last_updated": "2024-02-20", + "contributor": "Maria Gonzalez", + "difficulty": "Intermediate", + "type": "Reference" + }, + { + "title": "WebAssembly Integration", + "content": "TinySearch generates WebAssembly modules that can be integrated into any website. The WASM file contains the search index and search algorithms, while the JavaScript glue code provides a simple API for performing searches and handling results. Integration involves loading the WASM module, initializing the search engine, and calling search functions with query strings. Results are returned as JSON arrays with title, URL, and metadata for each match. The system supports asynchronous loading, error handling, and memory management. Performance is excellent with typical search times under 1ms for most queries.", + "section": "Integration", + "keywords": "webassembly wasm integration javascript api browser client-side", + "doc_url": "https://docs.example.com/wasm-integration", + "version": "v0.9.0", + "last_updated": "2024-02-18", + "contributor": "David Kim", + "difficulty": "Advanced", + "type": "Guide" + }, + { + "title": "Performance Optimization", + "content": "TinySearch is designed for excellent performance, but several optimization techniques can improve search speed and reduce file size. Index size is primarily determined by content volume and vocabulary diversity. Using focused keywords and removing stopwords reduces index size. The --optimize flag enables wasm-opt compression, typically reducing file size by 20-30%. Content preprocessing like markdown stripping and normalization improves search relevance. For large datasets, consider splitting into multiple search indices or implementing progressive loading. Memory usage scales with index size, and the search algorithm is optimized for minimal allocations during query execution.", + "section": "Performance", + "keywords": "optimization performance speed file-size memory wasm-opt compression", + "doc_url": "https://docs.example.com/performance", + "version": "v0.9.0", + "last_updated": "2024-02-15", + "contributor": "Jennifer Liu", + "difficulty": "Advanced", + "type": "Guide" + }, + { + "title": "Static Site Generator Integration", + "content": "TinySearch integrates seamlessly with popular static site generators including Jekyll, Hugo, Zola, Gatsby, and Next.js. The typical workflow involves generating a JSON index during the build process, running TinySearch to create WASM files, and including the search interface in your theme. Jekyll users can use liquid templates to generate the JSON index from post frontmatter and content. Hugo supports JSON output formats for automatic index generation. Modern frameworks like Gatsby and Next.js can generate indices programmatically during build time. Examples and starter templates are available for each platform, demonstrating best practices for integration and styling.", + "section": "Integration", + "keywords": "static-site-generators jekyll hugo gatsby nextjs zola build-process", + "doc_url": "https://docs.example.com/ssg-integration", + "version": "v0.9.0", + "last_updated": "2024-02-12", + "contributor": "Sophie Anderson", + "difficulty": "Intermediate", + "type": "Guide" + }, + { + "title": "Troubleshooting Guide", + "content": "Common issues and solutions for TinySearch deployment and usage. MIME type errors when loading WASM files can be resolved by configuring your web server to serve .wasm files with the correct application/wasm content type. Large index files may cause memory issues in browsers - consider splitting content or using pagination. Search relevance can be improved by adjusting indexed fields and using more descriptive keywords. Build errors often relate to JSON formatting or configuration syntax. Network errors during WASM loading need proper CORS headers and HTTPS for production deployment. Debug mode provides detailed logging for diagnosing issues.", + "section": "Troubleshooting", + "keywords": "troubleshooting issues errors mime-type memory cors https debugging", + "doc_url": "https://docs.example.com/troubleshooting", + "version": "v0.9.0", + "last_updated": "2024-02-10", + "contributor": "Michael Torres", + "difficulty": "Intermediate", + "type": "Guide" + }, + { + "title": "API Reference", + "content": "Complete JavaScript API reference for TinySearch WebAssembly modules. The main entry point is init_tinysearch() which loads and initializes the WASM module asynchronously. The TinySearchWasm class provides search(query, numResults) method for executing queries. Results include title, url, and metadata fields as configured in your schema. Error handling covers WASM loading failures, invalid queries, and memory issues. Advanced methods include setStopwords() for custom word filtering and getStats() for index information. The API is promise-based and compatible with modern JavaScript environments including browsers and Node.js.", + "section": "API Reference", + "keywords": "api reference javascript methods functions init search results promises", + "doc_url": "https://docs.example.com/api-reference", + "version": "v0.9.0", + "last_updated": "2024-02-08", + "contributor": "Emma Thompson", + "difficulty": "Advanced", + "type": "Reference" + }, + { + "title": "Examples and Templates", + "content": "Ready-to-use examples and templates for common TinySearch use cases. The examples directory includes complete implementations for e-commerce product search, blog post search, and documentation search. Each example includes sample data, configuration files, and integration code. Templates are provided for popular frameworks and styling approaches. The e-commerce example demonstrates product catalogs with price and availability metadata. The blog example shows author and category filtering. Documentation examples include version and section organization. All examples include responsive design and accessibility considerations.", + "section": "Examples", + "keywords": "examples templates ecommerce blog documentation samples starter-code", + "doc_url": "https://docs.example.com/examples", + "version": "v0.9.0", + "last_updated": "2024-02-05", + "contributor": "Carlos Mendez", + "difficulty": "Beginner", + "type": "Examples" + }, + { + "title": "Contributing Guidelines", + "content": "Guidelines for contributing to TinySearch development including code style, testing requirements, and pull request process. The project welcomes contributions ranging from bug fixes to new features. Development setup requires Rust, wasm-pack, and Node.js for testing. Code follows standard Rust conventions with clippy linting and rustfmt formatting. All new features need comprehensive tests including unit tests and integration tests. Documentation updates are required for API changes. The review process includes automated testing, manual review, and performance benchmarking. Issues and feature requests are managed through GitHub with clear templates and labeling.", + "section": "Contributing", + "keywords": "contributing development code-style testing pull-requests github rust", + "doc_url": "https://docs.example.com/contributing", + "version": "v0.9.0", + "last_updated": "2024-02-01", + "contributor": "Ryan Park", + "difficulty": "Advanced", + "type": "Guide" + } +] \ No newline at end of file diff --git a/examples/documentation/tinysearch.toml b/examples/documentation/tinysearch.toml new file mode 100644 index 0000000..9aa7025 --- /dev/null +++ b/examples/documentation/tinysearch.toml @@ -0,0 +1,11 @@ +[schema] +# Fields that will be indexed for full-text search +# Users can search across titles, content, sections, and keywords +indexed_fields = ["title", "content", "section", "keywords"] + +# Fields that will be stored as metadata but not indexed +# These provide context and navigation information +metadata_fields = ["version", "last_updated", "contributor", "difficulty", "type"] + +# Field that contains the URL for each documentation page +url_field = "doc_url" \ No newline at end of file diff --git a/examples/ecommerce/README.md b/examples/ecommerce/README.md new file mode 100644 index 0000000..57d732e --- /dev/null +++ b/examples/ecommerce/README.md @@ -0,0 +1,59 @@ +# E-commerce Search Example + +This example demonstrates how to use TinySearch for an e-commerce website with product data. + +## Configuration + +The `tinysearch.toml` file configures the search to: + +- **Index**: `product_name`, `description`, `category`, and `tags` for full-text search +- **Store as metadata**: `price`, `image_url`, `brand`, `availability`, `rating`, `reviews_count` +- **Use**: `product_url` as the link field for each product + +## Sample Data + +The `products.json` file contains 10 sample products across different categories: +- Electronics (headphones, speakers, webcams) +- Gaming (keyboards) +- Accessories (charging pads, USB hubs) +- Wearables (smartwatches) +- Furniture (office chairs) +- Lighting (desk lamps) +- Storage (external drives) + +## Usage + +From this directory, run: + +```bash +# Generate the search index +tinysearch -m storage -p ./output products.json + +# Test searching for "wireless" +tinysearch -m search -S "wireless" -N 5 ./output/storage + +# Test searching for "gaming" +tinysearch -m search -S "gaming" -N 3 ./output/storage +``` + +## For Web Integration + +Generate WASM files for browser use: + +```bash +# Development version with demo +tinysearch -m wasm -p ./wasm_output products.json + +# Production version (no demo files) +tinysearch --release -m wasm -p ./wasm_output products.json +``` + +## Search Examples + +Users can search for: +- Product names: "headphones", "keyboard", "webcam" +- Categories: "electronics", "gaming", "furniture" +- Features: "wireless", "waterproof", "rgb", "4k" +- Use cases: "office", "gaming", "travel", "outdoor" + +The search results will include product metadata like price, brand, and availability for display in your e-commerce interface. \ No newline at end of file diff --git a/examples/ecommerce/products.json b/examples/ecommerce/products.json new file mode 100644 index 0000000..059dbdb --- /dev/null +++ b/examples/ecommerce/products.json @@ -0,0 +1,132 @@ +[ + { + "product_name": "Wireless Noise-Cancelling Headphones", + "description": "Premium over-ear headphones with active noise cancellation technology. Features 30-hour battery life, premium leather ear cups, and crystal-clear audio quality. Perfect for travel, work, or enjoying your favorite music.", + "category": "Electronics", + "tags": "headphones wireless bluetooth noise-cancelling audio music travel premium", + "product_url": "https://store.example.com/wireless-headphones-anc", + "price": "$299.99", + "image_url": "https://store.example.com/images/headphones-wireless.jpg", + "brand": "AudioTech Pro", + "availability": "In Stock", + "rating": "4.8", + "reviews_count": "1,247" + }, + { + "product_name": "Portable Waterproof Bluetooth Speaker", + "description": "Compact speaker with 360-degree surround sound and IPX7 waterproof rating. Perfect for outdoor adventures, beach trips, and pool parties. Connects to any Bluetooth device with 20+ hour battery life.", + "category": "Electronics", + "tags": "speaker bluetooth portable waterproof outdoor beach pool music", + "product_url": "https://store.example.com/bluetooth-speaker-waterproof", + "price": "$149.99", + "image_url": "https://store.example.com/images/speaker-waterproof.jpg", + "brand": "SoundWave", + "availability": "In Stock", + "rating": "4.6", + "reviews_count": "892" + }, + { + "product_name": "Gaming Mechanical Keyboard RGB", + "description": "High-performance mechanical keyboard with customizable RGB backlighting. Cherry MX Blue switches provide tactile feedback perfect for gaming and typing. Programmable macro keys and aluminum frame construction.", + "category": "Gaming", + "tags": "keyboard mechanical gaming rgb cherry-mx switches backlight programmable", + "product_url": "https://store.example.com/gaming-keyboard-rgb", + "price": "$179.99", + "image_url": "https://store.example.com/images/keyboard-gaming.jpg", + "brand": "GameMaster", + "availability": "In Stock", + "rating": "4.7", + "reviews_count": "634" + }, + { + "product_name": "4K Ultra HD Webcam", + "description": "Professional-grade webcam with 4K resolution at 30fps. Auto-focus, built-in noise-cancelling microphone, and wide-angle lens. Ideal for video conferencing, streaming, and content creation.", + "category": "Electronics", + "tags": "webcam 4k video camera streaming conference professional microphone", + "product_url": "https://store.example.com/webcam-4k-professional", + "price": "$199.99", + "image_url": "https://store.example.com/images/webcam-4k.jpg", + "brand": "StreamVision", + "availability": "Limited Stock", + "rating": "4.5", + "reviews_count": "423" + }, + { + "product_name": "Wireless Charging Pad Fast Charge", + "description": "Qi-certified wireless charging pad with fast charging support up to 15W. Compatible with iPhone, Samsung, and other Qi-enabled devices. Sleek design with LED charging indicator and overheat protection.", + "category": "Accessories", + "tags": "wireless charging pad qi fast-charge iphone samsung phone accessories", + "product_url": "https://store.example.com/wireless-charging-pad", + "price": "$39.99", + "image_url": "https://store.example.com/images/charging-pad.jpg", + "brand": "PowerFlow", + "availability": "In Stock", + "rating": "4.4", + "reviews_count": "1,156" + }, + { + "product_name": "Smartwatch Fitness Tracker", + "description": "Advanced smartwatch with heart rate monitoring, GPS tracking, and 7-day battery life. Track your workouts, monitor sleep patterns, receive notifications, and stay connected. Water-resistant design perfect for active lifestyles.", + "category": "Wearables", + "tags": "smartwatch fitness tracker heart-rate gps health notifications water-resistant", + "product_url": "https://store.example.com/smartwatch-fitness", + "price": "$249.99", + "image_url": "https://store.example.com/images/smartwatch.jpg", + "brand": "FitTech", + "availability": "In Stock", + "rating": "4.3", + "reviews_count": "728" + }, + { + "product_name": "USB-C Hub Multi-Port Adapter", + "description": "7-in-1 USB-C hub with 4K HDMI, USB 3.0 ports, SD card reader, and 100W power delivery pass-through. Aluminum construction with plug-and-play setup. Perfect for MacBook, laptop, and tablet connectivity.", + "category": "Accessories", + "tags": "usb-c hub adapter hdmi ports sd-card power-delivery macbook laptop", + "product_url": "https://store.example.com/usb-c-hub-adapter", + "price": "$79.99", + "image_url": "https://store.example.com/images/usb-hub.jpg", + "brand": "ConnectPro", + "availability": "In Stock", + "rating": "4.6", + "reviews_count": "945" + }, + { + "product_name": "Ergonomic Office Chair", + "description": "Premium ergonomic office chair with lumbar support, adjustable armrests, and breathable mesh back. 360-degree swivel with smooth-rolling casters. Supports up to 300lbs with 5-year warranty.", + "category": "Furniture", + "tags": "office chair ergonomic lumbar support adjustable mesh comfortable workspace", + "product_url": "https://store.example.com/ergonomic-office-chair", + "price": "$399.99", + "image_url": "https://store.example.com/images/office-chair.jpg", + "brand": "WorkComfort", + "availability": "In Stock", + "rating": "4.7", + "reviews_count": "512" + }, + { + "product_name": "LED Desk Lamp Adjustable", + "description": "Modern LED desk lamp with touch controls and adjustable brightness levels. Eye-caring LED technology reduces eye strain. Flexible arm and head positioning with USB charging port built-in.", + "category": "Lighting", + "tags": "desk lamp led adjustable touch-control eye-care usb charging modern", + "product_url": "https://store.example.com/led-desk-lamp", + "price": "$89.99", + "image_url": "https://store.example.com/images/desk-lamp.jpg", + "brand": "LightWorks", + "availability": "In Stock", + "rating": "4.5", + "reviews_count": "367" + }, + { + "product_name": "Portable SSD External Drive 1TB", + "description": "Ultra-fast portable SSD with 1TB storage capacity and USB 3.2 Gen 2 interface. Transfer speeds up to 1,050 MB/s with password protection and 256-bit AES encryption. Compact aluminum design.", + "category": "Storage", + "tags": "ssd external drive storage portable usb fast encryption backup 1tb", + "product_url": "https://store.example.com/portable-ssd-1tb", + "price": "$159.99", + "image_url": "https://store.example.com/images/portable-ssd.jpg", + "brand": "DataVault", + "availability": "In Stock", + "rating": "4.8", + "reviews_count": "1,089" + } +] \ No newline at end of file diff --git a/examples/ecommerce/tinysearch.toml b/examples/ecommerce/tinysearch.toml new file mode 100644 index 0000000..ef9fec0 --- /dev/null +++ b/examples/ecommerce/tinysearch.toml @@ -0,0 +1,11 @@ +[schema] +# Fields that will be indexed for full-text search +# Users can search by product name, description, category, and tags +indexed_fields = ["product_name", "description", "category", "tags"] + +# Fields that will be stored as metadata but not indexed +# These are displayed in results but not searchable +metadata_fields = ["price", "image_url", "brand", "availability", "rating", "reviews_count"] + +# Field that contains the URL for each product +url_field = "product_url" \ No newline at end of file diff --git a/examples/index.json b/examples/index.json new file mode 100644 index 0000000..92b0852 --- /dev/null +++ b/examples/index.json @@ -0,0 +1,35 @@ +[ + { + "title": "Getting Started with Rust", + "body": "Rust is a systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.", + "description": "A comprehensive introduction to Rust programming", + "tags": "rust programming tutorial beginner", + "url": "https://example.com/rust-intro", + "author": "Jane Developer", + "date": "2024-01-15", + "category": "Programming", + "image_url": "https://example.com/images/rust.jpg" + }, + { + "title": "Web Development with JavaScript", + "body": "JavaScript is the language of the web. Learn how to build interactive websites and web applications.", + "description": "Modern JavaScript development techniques and frameworks", + "tags": "javascript web development frontend", + "url": "https://example.com/js-guide", + "author": "John WebDev", + "date": "2024-01-20", + "category": "Web Development", + "image_url": "https://example.com/images/js.jpg" + }, + { + "title": "Database Design Principles", + "body": "Good database design is crucial for application performance and maintainability.", + "description": "Essential principles for designing efficient databases", + "tags": "database design sql principles", + "url": "https://example.com/db-design", + "author": "Alice DataEngineer", + "date": "2024-01-25", + "category": "Database", + "image_url": "https://example.com/images/database.jpg" + } +] \ No newline at end of file diff --git a/examples/tinysearch.toml b/examples/tinysearch.toml new file mode 100644 index 0000000..79c139c --- /dev/null +++ b/examples/tinysearch.toml @@ -0,0 +1,16 @@ +# tinysearch.toml - Configuration file for customizing search schema +# Place this file in the same directory as your index.json file + +[schema] +# Fields that should be indexed for full-text search +# These fields will be tokenized and searchable +indexed_fields = ["title", "body", "description", "tags"] + +# Fields that should be stored as metadata but not indexed +# These fields will be returned in search results but won't be searchable +# Note: The url_field is automatically included as metadata +metadata_fields = ["author", "date", "category", "image_url"] + +# Field that contains the URL/link for each document +# This field is always included in results +url_field = "url" \ No newline at end of file diff --git a/src/bin/tinysearch.rs b/src/bin/tinysearch.rs index 68b9f44..e7bc4cf 100644 --- a/src/bin/tinysearch.rs +++ b/src/bin/tinysearch.rs @@ -15,6 +15,7 @@ use std::process::{Command, Stdio}; use std::str::FromStr; use std::{env, fs}; use tempfile::TempDir; +use tinysearch::SearchSchema; use toml_edit::{DocumentMut, value}; use index::Posts; @@ -208,13 +209,22 @@ impl Stage for Search { struct Storage { posts_index: PathBuf, out_path: PathBuf, + schema: SearchSchema, } impl Stage for Storage { fn from_opt(opt: &Opt) -> Result { + let posts_index = opt.input_file.clone().context("No input file")?; + let parent_dir = posts_index + .parent() + .unwrap_or_else(|| std::path::Path::new(".")); + let schema = SearchSchema::load_from_file(parent_dir) + .map_err(|e| anyhow::anyhow!("Failed to load schema: {}", e))?; + Ok(Self { - posts_index: opt.input_file.clone().context("No input file")?, + posts_index, out_path: ensure_exists(opt.out_path.clone())?, + schema, }) } @@ -225,13 +235,15 @@ impl Stage for Storage { self.posts_index.display(), storage_file.display() ); - let posts: Posts = index::read( - fs::read_to_string(&self.posts_index) - .with_context(|| format!("Failed to read file {}", self.posts_index.display()))?, - ) - .with_context(|| format!("Failed to decode {}", self.posts_index.display()))?; + + let raw_content = fs::read_to_string(&self.posts_index) + .with_context(|| format!("Failed to read file {}", self.posts_index.display()))?; + + let posts: Posts = index::read(raw_content) + .with_context(|| format!("Failed to decode {}", self.posts_index.display()))?; trace!("Generating storage from posts: {:#?}", posts); - storage::write(posts, &storage_file)?; + storage::write(posts, &storage_file, &self.schema)?; + println!("Storage ready in file {}", storage_file.display()); Ok(()) } diff --git a/src/bin/utils/index.rs b/src/bin/utils/index.rs index 301f9f1..2440b8f 100644 --- a/src/bin/utils/index.rs +++ b/src/bin/utils/index.rs @@ -1,11 +1,10 @@ use serde::{Deserialize, Serialize}; +use std::collections::HashMap; #[derive(Debug, Serialize, Deserialize)] pub struct Post { - pub title: String, - pub url: String, - pub meta: Option, - pub body: Option, + #[serde(flatten)] + pub fields: HashMap, } pub type Posts = Vec; diff --git a/src/bin/utils/storage.rs b/src/bin/utils/storage.rs index de92182..3695b04 100644 --- a/src/bin/utils/storage.rs +++ b/src/bin/utils/storage.rs @@ -6,11 +6,11 @@ use std::path; use super::assets::STOP_WORDS; use super::index::Posts; use strip_markdown::strip_markdown; -use tinysearch::{Filters, PostId, Storage}; +use tinysearch::{Filters, PostId, SearchSchema, Storage}; use xorf::HashProxy; -pub fn write(posts: Posts, path: &path::PathBuf) -> Result<(), Error> { - let filters = build(posts)?; +pub fn write(posts: Posts, path: &path::PathBuf, schema: &SearchSchema) -> Result<(), Error> { + let filters = build(posts, schema)?; trace!("Storage::from"); let storage = Storage::from(filters); trace!("Write"); @@ -19,8 +19,8 @@ pub fn write(posts: Posts, path: &path::PathBuf) -> Result<(), Error> { Ok(()) } -fn build(posts: Posts) -> Result { - let posts = prepare_posts(posts); +fn build(posts: Posts, schema: &SearchSchema) -> Result { + let posts = prepare_posts(posts, schema); generate_filters(posts) } @@ -78,15 +78,108 @@ pub fn generate_filters(posts: HashMap>) -> Result HashMap> { +// prepares posts with arbitrary field mappings based on schema +pub fn prepare_posts(posts: Posts, schema: &SearchSchema) -> HashMap> { posts .into_iter() - .inspect(|post| debug!("Analyzing {}", post.url)) - .map(|post| ((post.title, post.url, post.meta), post.body)) + .inspect(|post| { + if let Some(url) = post.fields.get(&schema.url_field) { + debug!("Analyzing {}", extract_string_value(url)); + } + }) + .map(|post| { + let mut indexed_content = String::new(); + let mut metadata_content = String::new(); + + // Handle indexed fields + for field in &schema.indexed_fields { + if let Some(value) = post.fields.get(field) { + let field_content = extract_string_value(value); + if !field_content.is_empty() { + indexed_content.push_str(&field_content); + indexed_content.push(' '); + } + } else { + debug!("Field '{}' not found in post for indexing", field); + } + } + + // Handle metadata fields + for field in &schema.metadata_fields { + if let Some(value) = post.fields.get(field) { + let field_content = extract_string_value(value); + if !field_content.is_empty() { + metadata_content.push_str(&field_content); + metadata_content.push(' '); + } + } else { + debug!("Field '{}' not found in post for metadata", field); + } + } + + // Handle URL field + let url_value = if let Some(value) = post.fields.get(&schema.url_field) { + extract_string_value(value) + } else { + debug!( + "URL field '{}' not found in post, using empty string", + schema.url_field + ); + String::new() + }; + + // Extract title for PostId - use first indexed field as title or URL field as fallback + let title = if let Some(title_field) = schema.indexed_fields.first() { + if let Some(value) = post.fields.get(title_field) { + extract_string_value(value) + } else { + url_value.clone() + } + } else { + url_value.clone() + }; + + // Create PostId with title, URL, and metadata + let post_id = ( + title, + url_value, + if metadata_content.trim().is_empty() { + None + } else { + Some(metadata_content.trim().to_string()) + }, + ); + + ( + post_id, + if indexed_content.trim().is_empty() { + None + } else { + Some(indexed_content.trim().to_string()) + }, + ) + }) .collect() } +// Helper function to extract string value from JSON value +fn extract_string_value(value: &serde_json::Value) -> String { + match value { + serde_json::Value::String(s) => s.clone(), + serde_json::Value::Number(n) => n.to_string(), + serde_json::Value::Bool(b) => b.to_string(), + serde_json::Value::Array(arr) => arr + .iter() + .filter_map(|v| match v { + serde_json::Value::String(s) => Some(s.as_str()), + _ => None, + }) + .collect::>() + .join(" "), + _ => String::new(), + } +} + #[cfg(test)] mod tests { use xorf::Filter; @@ -124,4 +217,120 @@ mod tests { assert!(filter.contains(&"kubernetes".to_owned())); assert!(filter.contains(&"excel".to_owned())); } + + #[test] + fn test_prepare_posts_with_schema() { + use super::super::index::Post; + use std::collections::HashMap; + + let mut post_fields = HashMap::new(); + post_fields.insert( + "title".to_string(), + serde_json::Value::String("Test Title".to_string()), + ); + post_fields.insert( + "url".to_string(), + serde_json::Value::String("https://example.com".to_string()), + ); + post_fields.insert( + "body".to_string(), + serde_json::Value::String("Test body content".to_string()), + ); + + let posts = vec![Post { + fields: post_fields, + }]; + + let schema = SearchSchema::default(); + let prepared = prepare_posts(posts, &schema); + + assert_eq!(prepared.len(), 1); + let (post_id, body) = prepared.iter().next().unwrap(); + + assert_eq!(post_id.0, "Test Title"); + assert_eq!(post_id.1, "https://example.com"); + assert!(body.is_some()); + assert!(body.as_ref().unwrap().contains("Test Title")); + assert!(body.as_ref().unwrap().contains("Test body content")); + } + + #[test] + fn test_prepare_posts_custom_fields() { + use super::super::index::Post; + use std::collections::HashMap; + + let mut post_fields = HashMap::new(); + post_fields.insert( + "product_name".to_string(), + serde_json::Value::String("Gaming Laptop".to_string()), + ); + post_fields.insert( + "description".to_string(), + serde_json::Value::String("High-performance gaming laptop".to_string()), + ); + post_fields.insert( + "product_url".to_string(), + serde_json::Value::String("https://example.com/laptop".to_string()), + ); + post_fields.insert( + "price".to_string(), + serde_json::Value::String("$1999.99".to_string()), + ); + post_fields.insert( + "brand".to_string(), + serde_json::Value::String("TechCorp".to_string()), + ); + + let posts = vec![Post { + fields: post_fields, + }]; + + let schema = SearchSchema { + indexed_fields: vec!["product_name".to_string(), "description".to_string()], + metadata_fields: vec!["price".to_string(), "brand".to_string()], + url_field: "product_url".to_string(), + }; + + let prepared = prepare_posts(posts, &schema); + + assert_eq!(prepared.len(), 1); + let (post_id, indexed_content) = prepared.iter().next().unwrap(); + + // Check PostId structure + assert_eq!(post_id.0, "Gaming Laptop"); // Title should be first indexed field + assert_eq!(post_id.1, "https://example.com/laptop"); // URL from product_url field + assert!(post_id.2.is_some()); // Should have metadata + let metadata = post_id.2.as_ref().unwrap(); + assert!(metadata.contains("$1999.99")); + assert!(metadata.contains("TechCorp")); + + // Check indexed content + assert!(indexed_content.is_some()); + let content = indexed_content.as_ref().unwrap(); + assert!(content.contains("Gaming Laptop")); + assert!(content.contains("High-performance gaming laptop")); + } + + #[test] + fn test_extract_string_value() { + use serde_json::Value; + + assert_eq!( + extract_string_value(&Value::String("test".to_string())), + "test" + ); + assert_eq!( + extract_string_value(&Value::Number(serde_json::Number::from(42))), + "42" + ); + assert_eq!(extract_string_value(&Value::Bool(true)), "true"); + + let array = Value::Array(vec![ + Value::String("hello".to_string()), + Value::String("world".to_string()), + ]); + assert_eq!(extract_string_value(&array), "hello world"); + + assert_eq!(extract_string_value(&Value::Null), ""); + } } diff --git a/src/lib.rs b/src/lib.rs index 43847fb..a541fd7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,9 @@ use std::collections::hash_map::DefaultHasher; use std::convert::From; use xorf::{Filter as XorfFilter, HashProxy, Xor8}; +#[cfg(feature = "bin")] +use std::path::Path; + /// Title of a post type Title = String; /// URL of a post @@ -26,6 +29,98 @@ pub type PostFilter = (PostId, HashProxy); /// Collection of all post filters pub type Filters = Vec; +/// Configuration schema for tinysearch.toml +#[cfg(feature = "bin")] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchSchemaConfig { + /// Schema configuration section + pub schema: SearchSchema, +} + +/// Schema configuration details +#[cfg(feature = "bin")] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchSchema { + /// Fields that should be indexed for searching + pub indexed_fields: Vec, + /// Fields that should be stored as metadata but not indexed + pub metadata_fields: Vec, + /// Field that contains the URL for each document + pub url_field: String, +} + +#[cfg(feature = "bin")] +impl Default for SearchSchema { + /// Default schema configuration matching current JSON structure + fn default() -> Self { + SearchSchema { + indexed_fields: vec!["title".to_string(), "body".to_string()], + metadata_fields: vec![], + url_field: "url".to_string(), + } + } +} + +#[cfg(feature = "bin")] +impl SearchSchema { + /// Load schema from tinysearch.toml file, falling back to defaults if not found + pub fn load_from_file>(path: P) -> Result { + let toml_path = path.as_ref().join("tinysearch.toml"); + + if !toml_path.exists() { + return Ok(SearchSchema::default()); + } + + let toml_content = std::fs::read_to_string(&toml_path) + .map_err(|e| format!("Failed to read tinysearch.toml: {e}"))?; + let config: SearchSchemaConfig = toml::from_str(&toml_content) + .map_err(|e| format!("Failed to parse tinysearch.toml: {e}"))?; + + // Validate schema + config.schema.validate()?; + + Ok(config.schema) + } + + /// Validate the schema configuration + pub fn validate(&self) -> Result<(), String> { + if self.indexed_fields.is_empty() { + return Err("indexed_fields cannot be empty".to_string()); + } + + if self.url_field.is_empty() { + return Err("url_field cannot be empty".to_string()); + } + + // Check for overlap between indexed and metadata fields + let all_fields: Vec<_> = self + .indexed_fields + .iter() + .chain(self.metadata_fields.iter()) + .chain(std::iter::once(&self.url_field)) + .collect(); + + let mut unique_fields = std::collections::HashSet::new(); + for field in &all_fields { + if !unique_fields.insert(field) { + return Err(format!("Duplicate field definition: {field}")); + } + } + + Ok(()) + } + + /// Get all fields that should be processed from JSON (indexed + metadata + url) + pub fn all_fields(&self) -> Vec { + let mut fields = self.indexed_fields.clone(); + fields.extend(self.metadata_fields.clone()); + if !fields.contains(&self.url_field) { + fields.push(self.url_field.clone()); + } + fields + } +} + /// Storage container for serialized search index #[derive(Serialize, Deserialize)] pub struct Storage { @@ -114,3 +209,115 @@ pub fn search(filters: &'_ Filters, query: String, num_results: usize) -> Vec<&' matches.into_iter().take(num_results).map(|p| p.0).collect() } + +#[cfg(test)] +#[cfg(feature = "bin")] +mod schema_tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_default_schema() { + let schema = SearchSchema::default(); + assert_eq!(schema.indexed_fields, vec!["title", "body"]); + assert_eq!(schema.metadata_fields, Vec::::new()); + assert_eq!(schema.url_field, "url"); + if let Err(e) = schema.validate() { + panic!("Default schema validation failed: {}", e); + } + } + + #[test] + fn test_load_nonexistent_file() { + let temp_dir = TempDir::new().unwrap(); + let schema = SearchSchema::load_from_file(temp_dir.path()).unwrap(); + assert_eq!(schema.indexed_fields, vec!["title", "body"]); + } + + #[test] + fn test_load_valid_toml() { + let temp_dir = TempDir::new().unwrap(); + let toml_content = r#" +[schema] +indexed_fields = ["title", "description"] +metadata_fields = ["author", "date", "image_url"] +url_field = "permalink" +"#; + std::fs::write(temp_dir.path().join("tinysearch.toml"), toml_content).unwrap(); + + let schema = SearchSchema::load_from_file(temp_dir.path()).unwrap(); + assert_eq!(schema.indexed_fields, vec!["title", "description"]); + assert_eq!(schema.metadata_fields, vec!["author", "date", "image_url"]); + assert_eq!(schema.url_field, "permalink"); + } + + #[test] + fn test_validation_empty_indexed_fields() { + let schema = SearchSchema { + indexed_fields: vec![], + metadata_fields: vec!["url".to_string()], + url_field: "url".to_string(), + }; + assert!(schema.validate().is_err()); + } + + #[test] + fn test_validation_empty_url_field() { + let schema = SearchSchema { + indexed_fields: vec!["title".to_string()], + metadata_fields: vec![], + url_field: String::new(), + }; + assert!(schema.validate().is_err()); + } + + #[test] + fn test_validation_duplicate_fields() { + let schema = SearchSchema { + indexed_fields: vec!["title".to_string(), "body".to_string()], + metadata_fields: vec!["title".to_string()], // Duplicate! + url_field: "url".to_string(), + }; + assert!(schema.validate().is_err()); + } + + #[test] + fn test_all_fields_method() { + let schema = SearchSchema { + indexed_fields: vec!["title".to_string(), "body".to_string()], + metadata_fields: vec!["author".to_string(), "date".to_string()], + url_field: "permalink".to_string(), + }; + + let all_fields = schema.all_fields(); + assert!(all_fields.contains(&"title".to_string())); + assert!(all_fields.contains(&"body".to_string())); + assert!(all_fields.contains(&"author".to_string())); + assert!(all_fields.contains(&"date".to_string())); + assert!(all_fields.contains(&"permalink".to_string())); + } + + #[test] + fn test_invalid_toml_format() { + let temp_dir = TempDir::new().unwrap(); + let invalid_toml = "this is not valid toml ["; + std::fs::write(temp_dir.path().join("tinysearch.toml"), invalid_toml).unwrap(); + + let result = SearchSchema::load_from_file(temp_dir.path()); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Failed to parse")); + } + + #[test] + fn test_missing_schema_section() { + let temp_dir = TempDir::new().unwrap(); + let toml_content = r#" +[other] +value = "test" +"#; + std::fs::write(temp_dir.path().join("tinysearch.toml"), toml_content).unwrap(); + + let result = SearchSchema::load_from_file(temp_dir.path()); + assert!(result.is_err()); + } +} diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 1247825..b5b0ff0 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -30,6 +30,7 @@ fn test_cli_wasm_mode() { let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let current_dir = std::env::current_dir().unwrap(); let output = Command::new("cargo") .args([ "run", @@ -39,6 +40,11 @@ fn test_cli_wasm_mode() { "wasm", "-p", temp_dir.path().to_str().unwrap(), + "--engine-version", + &format!( + "path=\"{current_dir}\"", + current_dir = current_dir.display() + ), "fixtures/index.json", ]) .output() @@ -109,3 +115,205 @@ fn test_cli_storage_mode() { let storage_path = temp_dir.path().join("storage"); assert!(storage_path.exists(), "Storage file should be created"); } + +#[test] +fn test_tinysearch_toml_configuration() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + + // Create a custom tinysearch.toml + let toml_content = r#" +[schema] +indexed_fields = ["title", "description", "tags"] +metadata_fields = ["author", "date", "category"] +url_field = "permalink" +"#; + std::fs::write(temp_dir.path().join("tinysearch.toml"), toml_content) + .expect("Failed to write tinysearch.toml"); + + // Create a custom JSON file with the schema fields + let json_content = r#" +[ + { + "title": "Custom Post Title", + "description": "This is a custom description", + "tags": "rust webassembly search", + "permalink": "https://example.com/custom", + "author": "Test Author", + "date": "2024-01-15", + "category": "Technology" + }, + { + "title": "Another Post", + "description": "Different content here", + "tags": "javascript frontend", + "permalink": "https://example.com/another", + "author": "Another Author", + "date": "2024-01-20", + "category": "Development" + } +] +"#; + let json_path = temp_dir.path().join("custom_index.json"); + std::fs::write(&json_path, json_content).expect("Failed to write custom JSON file"); + + let output = Command::new("cargo") + .args([ + "run", + "--features=bin", + "--", + "-m", + "storage", + "-p", + temp_dir.path().to_str().unwrap(), + json_path.to_str().unwrap(), + ]) + .output() + .expect("Failed to execute command"); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let stdout = String::from_utf8_lossy(&output.stdout); + eprintln!("Custom schema build failed. Stdout: {}", stdout); + eprintln!("Stderr: {}", stderr); + panic!("Custom schema build failed unexpectedly"); + } + + // Check that storage file was created + let storage_path = temp_dir.path().join("storage"); + assert!( + storage_path.exists(), + "Storage file should be created with custom schema" + ); + + // Test search functionality with the custom schema + let search_output = Command::new("cargo") + .args([ + "run", + "--features=bin", + "--", + "-m", + "search", + "-S", + "rust", + "-N", + "5", + storage_path.to_str().unwrap(), + ]) + .output() + .expect("Failed to execute search command"); + + assert!( + search_output.status.success(), + "Search should work with custom schema" + ); + + let search_stdout = String::from_utf8_lossy(&search_output.stdout); + assert!( + search_stdout.contains("Custom Post Title"), + "Should find the custom post" + ); + assert!( + search_stdout.contains("https://example.com/custom"), + "Should contain the custom URL from permalink field" + ); +} + +#[test] +fn test_flexible_json_fields() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + + // Create a tinysearch.toml with non-standard fields + let toml_content = r#" +[schema] +indexed_fields = ["product_name", "product_description"] +metadata_fields = ["price", "brand", "availability"] +url_field = "product_url" +"#; + std::fs::write(temp_dir.path().join("tinysearch.toml"), toml_content) + .expect("Failed to write tinysearch.toml"); + + // Create JSON with e-commerce-like fields + let json_content = r#" +[ + { + "product_name": "Wireless Headphones", + "product_description": "High-quality wireless headphones with active noise cancellation", + "product_url": "https://store.example.com/headphones", + "price": "$299.99", + "brand": "AudioTech", + "availability": "In Stock" + }, + { + "product_name": "Bluetooth Speaker", + "product_description": "Portable waterproof speaker with excellent sound quality", + "product_url": "https://store.example.com/speaker", + "price": "$149.99", + "brand": "SoundWave", + "availability": "Limited Stock" + } +] +"#; + let json_path = temp_dir.path().join("products.json"); + std::fs::write(&json_path, json_content).expect("Failed to write products JSON file"); + + let output = Command::new("cargo") + .args([ + "run", + "--features=bin", + "--", + "-m", + "storage", + "-p", + temp_dir.path().to_str().unwrap(), + json_path.to_str().unwrap(), + ]) + .output() + .expect("Failed to execute command"); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let stdout = String::from_utf8_lossy(&output.stdout); + eprintln!("Flexible fields build failed. Stdout: {}", stdout); + eprintln!("Stderr: {}", stderr); + panic!("Flexible fields build failed unexpectedly"); + } + + // Verify storage was created + let storage_path = temp_dir.path().join("storage"); + assert!( + storage_path.exists(), + "Storage file should be created with flexible fields" + ); + + // Test search works with the custom product fields + let search_output = Command::new("cargo") + .args([ + "run", + "--features=bin", + "--", + "-m", + "search", + "-S", + "wireless", + "-N", + "1", + storage_path.to_str().unwrap(), + ]) + .output() + .expect("Failed to execute search command"); + + assert!( + search_output.status.success(), + "Search should work with flexible product fields" + ); + + let search_stdout = String::from_utf8_lossy(&search_output.stdout); + assert!( + search_stdout.contains("Wireless Headphones"), + "Should find the wireless headphones product" + ); + assert!( + search_stdout.contains("https://store.example.com/headphones"), + "Should contain the product URL" + ); +} diff --git a/tinysearch.toml b/tinysearch.toml new file mode 100644 index 0000000..86c33be --- /dev/null +++ b/tinysearch.toml @@ -0,0 +1,10 @@ +[schema] +# Fields that should be indexed for searching +indexed_fields = ["title", "body"] + +# Fields that should be stored as metadata but not indexed +# Note: url_field is automatically included as metadata, don't duplicate it here +metadata_fields = [] + +# Field that contains the URL for each document +url_field = "url" \ No newline at end of file From f6644f392867590897ce0fb87be987ee2e4aedff Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Mon, 1 Sep 2025 13:48:24 +0200 Subject: [PATCH 57/58] Add support for using tinysearch as a library from Rust (#184) * Refactor library API into dedicated module with comprehensive documentation This refactoring introduces a clean, well-documented public API while maintaining backward compatibility with existing CLI functionality: - Created api module with Post trait and BasicPost implementation for flexible post types - Added TinySearch struct with builder pattern for configuration and search operations - Moved public API types to dedicated module with comprehensive rustdoc documentation - Added complete library usage examples for basic and advanced use cases - Created detailed LIBRARY_USAGE.md documentation with migration guide - Updated dependencies to support library functionality - Maintained full CLI compatibility and all existing functionality * Move library documentation into README and mark as experimental - Added concise library usage section to README - Marked library API as experimental with API stability warning - Removed separate LIBRARY_USAGE.md file - Referenced example directories for detailed usage * Refactor metadata handling in posts to use vectors instead of options for better indexing and search capabilities * Fix storage unit test for HashMap metadata compatibility - Updated test_generate_filters to use String::new() instead of None for metadata - This fixes CI compilation error where PostId expects String but test provided Option - All tests now pass with the new HashMap metadata format * Filter -> SearchIndex * Fix lints * Temporarily disable clippy lint checks in CI workflow due to noise --- .github/workflows/ci.yml | 5 +- Cargo.toml | 47 ++- Makefile | 4 +- README.md | 34 ++ assets/crate/src/lib.rs | 24 +- examples/library_advanced/main.rs | 100 ++++++ examples/library_basic/main.rs | 114 +++++++ examples/search_index_type.rs | 67 ++++ src/api.rs | 522 ++++++++++++++++++++++++++++++ src/bin/tinysearch.rs | 44 +-- src/bin/utils/storage.rs | 82 +++-- src/lib.rs | 128 ++++++-- tests/integration_test.rs | 33 +- 13 files changed, 1087 insertions(+), 117 deletions(-) create mode 100644 examples/library_advanced/main.rs create mode 100644 examples/library_basic/main.rs create mode 100644 examples/search_index_type.rs create mode 100644 src/api.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebe5687..e7f128e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,8 +46,9 @@ jobs: - name: Run cargo check run: cargo check --all-targets --all-features - - name: Run clippy - run: cargo clippy --all-targets --all-features -- -D warnings + # Ignore clippy lints for now, too noisy + # - name: Run clippy + # run: cargo clippy --all-targets --all-features -- -D warnings - name: Run tests run: cargo test --all-features diff --git a/Cargo.toml b/Cargo.toml index 793fa25..e9aab7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,11 +24,11 @@ bincode = "1.3.3" argh = { version = "0.1.12", optional = true } log = { version = "0.4.22", optional = true } -serde_json = { version = "1.0.132", optional = true } +serde_json = "1.0.132" anyhow = { version = "1.0.93", optional = true } tempfile = { version = "3.14.0", optional = true } -strip_markdown = { version = "0.2.0", optional = true } +strip_markdown = "0.2.0" strum = { version = "0.26.3", features = ["derive"], optional = true } toml = { version = "0.8.19", optional = true } toml_edit = { version = "0.22.22", optional = true } @@ -49,10 +49,8 @@ default = [] bin = [ "argh", "log", - "serde_json", "anyhow", "tempfile", - "strip_markdown", "strum", "toml", "toml_edit", @@ -60,3 +58,44 @@ bin = [ [dev-dependencies] tempfile = "3.14.0" + +[lints.clippy] +# Pedantic lints that help with code quality (lower priority so allows can override) +pedantic = { level = "warn", priority = -1 } +nursery = { level = "warn", priority = -1 } + +# Restriction lints for better code practices +dbg_macro = "warn" +todo = "warn" +unimplemented = "warn" +unwrap_used = "warn" +expect_used = "warn" +panic = "warn" +unreachable = "warn" +missing_docs_in_private_items = "warn" +print_stdout = "warn" +print_stderr = "warn" +use_debug = "warn" + +# Performance lints +inefficient_to_string = "warn" +large_enum_variant = "warn" +trivially_copy_pass_by_ref = "warn" + +# Correctness lints (use new name) +indexing_slicing = "warn" +arithmetic_side_effects = "warn" +float_arithmetic = "warn" +as_conversions = "warn" + +# Style lints +string_add = "warn" +string_add_assign = "warn" +single_char_lifetime_names = "warn" +explicit_deref_methods = "warn" + +# Allow some pedantic lints that are too noisy +module_name_repetitions = "allow" +must_use_candidate = "allow" +missing_errors_doc = "allow" +missing_panics_doc = "allow" diff --git a/Makefile b/Makefile index 943b9b9..a508d2e 100644 --- a/Makefile +++ b/Makefile @@ -77,12 +77,12 @@ release: ## Run tinysearch release build example: check-wasm-target ## Generate WASM output with sample data mkdir -p wasm_output - cargo run --features=bin -- -m wasm -p wasm_output fixtures/index.json + cargo run --features=bin -- -m wasm -p wasm_output -e 'path="$(PWD)"' fixtures/index.json demo: check-wasm-target ## Run interactive demo (generates WASM and starts server) @echo "🚀 Building TinySearch and generating WASM demo..." @mkdir -p demo - @cargo run --features=bin -- -m wasm -p demo fixtures/index.json + @cargo run --features=bin -- -m wasm -p demo -e 'path="$(PWD)"' fixtures/index.json @mv demo/demo.html demo/index.html @echo "🌐 Starting demo server at http://localhost:8000/demo/" @echo " Press Ctrl+C to stop the server" diff --git a/README.md b/README.md index 6eba841..e0d3798 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,40 @@ metadata_fields = ["version", "last_updated", "contributor"] url_field = "doc_url" ``` +## Library Usage (Experimental) + +tinysearch can be used as a Rust library for programmatic search index generation and searching. This feature is experimental and the API may change. + +Add tinysearch to your `Cargo.toml`: + +```sh +cargo add tinysearch +``` + +Basic usage with the provided `BasicPost` struct: + +```rust +use tinysearch::{BasicPost, TinySearch}; +use std::collections::HashMap; + +let posts = vec![ + BasicPost { + title: "My Post".to_string(), + url: "/my-post".to_string(), + body: Some("Post content here".to_string()), + meta: HashMap::new(), + } +]; + +let search = TinySearch::new(); +let index = search.build_index(&posts)?; +let results = search.search(&index, "content", 10); +``` + +For advanced usage including custom post types and configuration, see: +- [Basic library example](examples/library_basic/) +- [Advanced library example](examples/library_advanced/) + ## Advanced Usage For advanced usage options, run diff --git a/assets/crate/src/lib.rs b/assets/crate/src/lib.rs index 2fe72c6..e78d356 100644 --- a/assets/crate/src/lib.rs +++ b/assets/crate/src/lib.rs @@ -1,17 +1,17 @@ -use std::sync::OnceLock; use std::ffi::{CStr, CString}; use std::os::raw::c_char; +use std::sync::OnceLock; -use tinysearch::{search as base_search, Filters, PostId, Storage}; +use tinysearch::{PostId, SearchIndex, Storage, search as base_search}; -static FILTERS: OnceLock = OnceLock::new(); +static SEARCH_INDEX: OnceLock = OnceLock::new(); pub fn search_local(query: String, num_results: usize) -> Vec<&'static PostId> { - let filters = FILTERS.get_or_init(|| { + let index = SEARCH_INDEX.get_or_init(|| { let bytes = include_bytes!("storage"); Storage::from_bytes(bytes).unwrap().filters }); - base_search(filters, query, num_results) + base_search(index, &query, num_results) } /// Export for WASM - search function that takes C strings and returns JSON @@ -28,15 +28,17 @@ pub extern "C" fn search(query_ptr: *const c_char, num_results: usize) -> *mut c }; let results = search_local(query, num_results); - + // Convert results to a simple JSON format let json_results: Vec = results .into_iter() - .map(|post_id| serde_json::json!({ - "title": post_id.0, - "url": post_id.1, - "meta": post_id.2 - })) + .map(|post_id| { + serde_json::json!({ + "title": post_id.title, + "url": post_id.url, + "meta": post_id.meta + }) + }) .collect(); let json_string = match serde_json::to_string(&json_results) { diff --git a/examples/library_advanced/main.rs b/examples/library_advanced/main.rs new file mode 100644 index 0000000..f04a2fa --- /dev/null +++ b/examples/library_advanced/main.rs @@ -0,0 +1,100 @@ +//! Advanced library usage example showing custom Post trait implementation + +#![allow(clippy::print_stdout, clippy::missing_docs_in_private_items)] + +use std::collections::HashMap; +use tinysearch::{Post, TinySearch}; + +/// Example of implementing the Post trait on your own custom type +#[derive(Debug)] +struct BlogPost { + title: String, + slug: String, + content: String, + tags: Vec, + author: String, +} + +impl Post for BlogPost { + fn title(&self) -> &str { + &self.title + } + + fn url(&self) -> &str { + &self.slug + } + + fn body(&self) -> Option<&str> { + Some(&self.content) + } + + fn meta(&self) -> HashMap { + // Include author and tags in searchable metadata + let mut meta = HashMap::new(); + meta.insert("author".to_string(), self.author.clone()); + meta.insert("tags".to_string(), self.tags.join(", ")); + meta + } +} + +fn main() -> Result<(), Box> { + println!("Custom Post trait implementation example\n"); + + // Create blog posts using your own struct + let blog_posts = vec![ + BlogPost { + title: "Getting Started with Rust".to_string(), + slug: "/blog/rust-getting-started".to_string(), + content: "Rust is a systems programming language focused on safety and performance" + .to_string(), + tags: vec!["rust".to_string(), "programming".to_string()], + author: "Alice".to_string(), + }, + BlogPost { + title: "WebAssembly Performance Tips".to_string(), + slug: "/blog/wasm-performance".to_string(), + content: "Optimizing WebAssembly modules for better performance in browsers" + .to_string(), + tags: vec!["wasm".to_string(), "performance".to_string()], + author: "Bob".to_string(), + }, + BlogPost { + title: "Building Search Engines".to_string(), + slug: "/blog/search-engines".to_string(), + content: "How to build efficient search engines using modern techniques".to_string(), + tags: vec!["search".to_string(), "algorithms".to_string()], + author: "Alice".to_string(), + }, + ]; + + // Create search engine with custom stopwords + let search = TinySearch::new().with_stopwords(vec!["the".to_string(), "with".to_string()]); + + // Build index from custom post types + println!("Building index from {} blog posts...", blog_posts.len()); + let index = search.build_index(&blog_posts)?; + println!("Index built with {} filters\n", index.len()); + + // Search examples + let queries = vec!["rust", "alice", "performance", "wasm"]; + + for query in queries { + println!("Searching for: '{query}'"); + let results = search.search(&index, query, 3); + + if results.is_empty() { + println!(" No results found"); + } else { + for result in results { + println!(" - {} ({})", result.title, result.url); + if !result.meta.is_empty() { + println!(" Meta: {}", result.meta); + } + } + } + println!(); + } + + println!("Custom Post implementation example completed!"); + Ok(()) +} diff --git a/examples/library_basic/main.rs b/examples/library_basic/main.rs new file mode 100644 index 0000000..b1483b2 --- /dev/null +++ b/examples/library_basic/main.rs @@ -0,0 +1,114 @@ +//! Basic library usage example demonstrating search functionality + +#![allow(clippy::print_stdout, clippy::missing_docs_in_private_items)] + +use std::collections::HashMap; +use tinysearch::{BasicPost, TinySearch}; + +fn main() -> Result<(), Box> { + println!("Testing tinysearch library API...\n"); + + // Example 1: Create posts manually using BasicPost + let posts = vec![ + BasicPost { + title: "Introduction to Rust".to_string(), + url: "/rust-intro".to_string(), + body: Some( + "Rust is a systems programming language that is fast, safe, and concurrent" + .to_string(), + ), + meta: HashMap::new(), + }, + BasicPost { + title: "WebAssembly Tutorial".to_string(), + url: "/wasm-tutorial".to_string(), + body: Some( + "WebAssembly (WASM) allows you to run code at near-native speed in web browsers" + .to_string(), + ), + meta: HashMap::new(), + }, + BasicPost { + title: "Building Search Engines".to_string(), + url: "/search-engines".to_string(), + body: Some( + "Search engines use various algorithms to index and retrieve relevant documents" + .to_string(), + ), + meta: HashMap::new(), + }, + ]; + + // Create a TinySearch instance + let tinysearch = TinySearch::new(); + + // Build search index + println!("Building search index from {} posts...", posts.len()); + let index = tinysearch.build_index(&posts)?; + println!("Index built successfully with {} filters\n", index.len()); + + // Search the index + let queries = vec!["rust", "wasm", "search", "algorithms"]; + for query in queries { + println!("Searching for: '{query}'"); + let results = tinysearch.search(&index, query, 5); + for result in results { + println!(" - {}: {}", result.title, result.url); + } + println!(); + } + + // Example 2: Parse from JSON + let json_data = r#"[ + { + "title": "JSON Parsing Example", + "url": "/json-example", + "body": "This post demonstrates JSON parsing functionality in tinysearch" + } + ]"#; + + println!("Testing JSON parsing..."); + let json_posts = tinysearch.parse_posts_from_json(json_data)?; + println!("Parsed {} posts from JSON\n", json_posts.len()); + + // Example 3: Serialize and deserialize index + println!("Testing serialization..."); + let serialized = tinysearch.build_and_serialize_index(&json_posts)?; + println!("Serialized index size: {} bytes", serialized.len()); + + let deserialized_index = tinysearch.load_index_from_bytes(&serialized)?; + println!( + "Deserialized index with {} filters", + deserialized_index.len() + ); + + let search_results = tinysearch.search(&deserialized_index, "json", 5); + println!( + "Search results for 'json': {} matches", + search_results.len() + ); + + // Example 4: Using builder pattern with custom stopwords + println!("\nTesting builder pattern with custom stopwords..."); + let tinysearch = TinySearch::new().with_stopwords(vec!["the".to_string(), "is".to_string()]); + + let test_posts = vec![BasicPost { + title: "The Ultimate Guide".to_string(), + url: "/ultimate-guide".to_string(), + body: Some("This is the ultimate guide to everything".to_string()), + meta: HashMap::new(), + }]; + + let custom_stopwords_index = tinysearch.build_index(&test_posts)?; + let stopword_results = tinysearch.search(&custom_stopwords_index, "ultimate", 5); + println!( + "Results with custom stopwords: {} matches", + stopword_results.len() + ); + for result in stopword_results { + println!(" - {}: {}", result.title, result.url); + } + + println!("\nLibrary API test completed successfully!"); + Ok(()) +} diff --git a/examples/search_index_type.rs b/examples/search_index_type.rs new file mode 100644 index 0000000..3b7f1b5 --- /dev/null +++ b/examples/search_index_type.rs @@ -0,0 +1,67 @@ +//! Example showing how to use the `SearchIndex` type to store and work with +//! search indexes without needing to import the xorf library directly. + +#![allow(clippy::print_stdout, clippy::missing_docs_in_private_items)] + +use std::collections::HashMap; +use tinysearch::{BasicPost, SearchIndex, TinySearch}; + +/// Example showing how to use the `SearchIndex` type to store and work with +/// search indexes without needing to import the xorf library directly. +fn main() -> Result<(), Box> { + println!("SearchIndex type example\n"); + + // Create some posts + let mut meta = HashMap::new(); + meta.insert("category".to_string(), "tutorial".to_string()); + + let posts = vec![ + BasicPost { + title: "Getting Started with Rust".to_string(), + url: "/rust-tutorial".to_string(), + body: Some("Learn Rust programming language basics".to_string()), + meta: meta.clone(), + }, + BasicPost { + title: "Advanced Rust Concepts".to_string(), + url: "/rust-advanced".to_string(), + body: Some("Deep dive into advanced Rust features".to_string()), + meta, + }, + ]; + + let search = TinySearch::new(); + + // Build index with explicit type annotation + let search_index: SearchIndex = search.build_index(&posts)?; + + println!("Built search index with {} entries", search_index.len()); + + // Store the index (could be in a struct field, etc.) + let stored_index = search_index; + + // Use the stored index for searching + let results = search.search(&stored_index, "rust programming", 10); + + println!("Search results for 'rust programming':"); + for result in results { + println!(" - {}: {}", result.title, result.url); + if !result.meta.is_empty() { + println!(" Meta: {}", result.meta); + } + } + + // Demonstrate serialization/deserialization with SearchIndex + println!("\nTesting serialization..."); + let index_bytes = search.build_and_serialize_index(&posts)?; + let loaded_index: SearchIndex = search.load_index_from_bytes(&index_bytes)?; + + let results = search.search(&loaded_index, "advanced", 5); + println!("Results from deserialized index:"); + for result in results { + println!(" - {}: {}", result.title, result.url); + } + + println!("\nSearchIndex type example completed!"); + Ok(()) +} diff --git a/src/api.rs b/src/api.rs new file mode 100644 index 0000000..687740d --- /dev/null +++ b/src/api.rs @@ -0,0 +1,522 @@ +//! Public API for tinysearch library +//! +//! This module contains the main public API types and functions for using tinysearch +//! as a library. The API is designed around the [`Post`] trait and [`TinySearch`] struct +//! which provide flexible and ergonomic access to search index generation and querying. + +use bincode::Error as BincodeError; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::convert::From; +use strip_markdown::strip_markdown; +use xorf::{HashProxy, Xor8}; + +use crate::{PostId, SearchIndex, Storage}; + +/// Trait that types must implement to be used as posts in tinysearch +/// +/// This trait allows users to use their own post types without needing to convert +/// to a specific struct, as long as they can provide the required fields through +/// these methods. +/// +/// # Example +/// +/// ```rust +/// use tinysearch::Post; +/// use std::collections::HashMap; +/// +/// #[derive(Debug)] +/// struct BlogPost { +/// title: String, +/// permalink: String, +/// content: String, +/// author: String, +/// } +/// +/// impl Post for BlogPost { +/// fn title(&self) -> &str { +/// &self.title +/// } +/// +/// fn url(&self) -> &str { +/// &self.permalink +/// } +/// +/// fn body(&self) -> Option<&str> { +/// Some(&self.content) +/// } +/// +/// fn meta(&self) -> HashMap { +/// let mut meta = HashMap::new(); +/// meta.insert("author".to_string(), self.author.clone()); +/// meta +/// } +/// } +/// ``` +pub trait Post { + /// Get the post title + /// + /// The title is used both for display in search results and as part of the + /// searchable content. Title matches are weighted higher than body matches. + fn title(&self) -> &str; + + /// Get the post URL or identifier + /// + /// This should be a unique identifier for the post, typically a URL path + /// or permalink that can be used to navigate to the post. + fn url(&self) -> &str; + + /// Get the post body content, if any + /// + /// The body content is tokenized and indexed for full-text search. + /// Return `None` if the post has no body content (e.g., for title-only posts). + fn body(&self) -> Option<&str>; + + /// Get metadata for the post as key-value pairs + /// + /// Metadata is also indexed and searchable, useful for things like author names, + /// tags, categories, or other structured data you want to be findable. + /// Return an empty `HashMap` if no metadata should be indexed. + fn meta(&self) -> HashMap; +} + +/// Basic implementation of the [`Post`] trait for simple use cases +/// +/// This struct provides a straightforward way to create posts without needing +/// to implement the [`Post`] trait yourself. All fields are public for easy construction. +/// +/// # Example +/// +/// ```rust +/// use tinysearch::BasicPost; +/// use std::collections::HashMap; +/// +/// let mut meta = HashMap::new(); +/// meta.insert("category".to_string(), "programming".to_string()); +/// meta.insert("author".to_string(), "John Doe".to_string()); +/// +/// let post = BasicPost { +/// title: "My First Post".to_string(), +/// url: "/posts/my-first-post".to_string(), +/// body: Some("This is the content of my post".to_string()), +/// meta, +/// }; +/// ``` +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BasicPost { + /// Post title + pub title: String, + /// Post URL or permalink + pub url: String, + /// Optional post body content + pub body: Option, + /// Metadata as key-value pairs (e.g., author, category, tags) + #[serde(default)] + pub meta: HashMap, +} + +impl Post for BasicPost { + fn title(&self) -> &str { + &self.title + } + + fn url(&self) -> &str { + &self.url + } + + fn body(&self) -> Option<&str> { + self.body.as_deref() + } + + fn meta(&self) -> HashMap { + self.meta.clone() + } +} + +/// Main API struct for tinysearch operations +/// +/// This struct provides the primary interface for building search indexes and +/// performing searches. It supports a builder pattern for configuration and +/// provides methods for common operations like JSON parsing and serialization. +/// +/// # Example +/// +/// ```rust +/// use tinysearch::{BasicPost, TinySearch}; +/// use std::collections::HashMap; +/// +/// // Create posts +/// let posts = vec![ +/// BasicPost { +/// title: "First Post".to_string(), +/// url: "/first".to_string(), +/// body: Some("Content about Rust programming".to_string()), +/// meta: HashMap::new(), +/// } +/// ]; +/// +/// // Build and search index +/// let search = TinySearch::new(); +/// let index = search.build_index(&posts).unwrap(); +/// let results = search.search(&index, "rust", 10); +/// ``` +#[derive(Debug, Clone)] +pub struct TinySearch { + /// Custom stopwords to use instead of built-in ones + custom_stopwords: Option>, +} + +impl TinySearch { + /// Create a new `TinySearch` instance with default settings + /// + /// The default configuration uses the built-in English stopwords list. + /// + /// # Example + /// + /// ```rust + /// use tinysearch::TinySearch; + /// + /// let search = TinySearch::new(); + /// ``` + pub const fn new() -> Self { + Self { + custom_stopwords: None, + } + } + + /// Configure custom stopwords to filter out during indexing (builder pattern) + /// + /// Stopwords are common words that are typically filtered out during indexing + /// to improve search quality and reduce index size. By default, tinysearch uses + /// a built-in English stopwords list. + /// + /// # Arguments + /// * `stopwords` - Collection of words to exclude from the index + /// + /// # Example + /// + /// ```rust + /// use tinysearch::TinySearch; + /// + /// let search = TinySearch::new() + /// .with_stopwords(vec!["the".to_string(), "and".to_string(), "or".to_string()]); + /// ``` + #[must_use] + pub fn with_stopwords(mut self, stopwords: I) -> Self + where + I: IntoIterator, + { + self.custom_stopwords = Some(stopwords.into_iter().collect()); + self + } + + /// Parse JSON string containing posts into a Vec + /// + /// This method parses JSON in the format expected by tinysearch, where each + /// post is an object with `title`, `url`, and optionally `body` and `meta` fields. + /// + /// # Arguments + /// * `json_str` - JSON string containing an array of post objects + /// + /// # Returns + /// * `Ok(Vec)` - Successfully parsed posts + /// * `Err(serde_json::Error)` - JSON parsing error + /// + /// # Example + /// + /// ```rust + /// use tinysearch::TinySearch; + /// + /// let json = r#"[ + /// { + /// "title": "My Post", + /// "url": "/my-post", + /// "body": "Post content goes here", + /// "meta": {"category": "programming", "author": "John"} + /// } + /// ]"#; + /// + /// let search = TinySearch::new(); + /// let posts = search.parse_posts_from_json(json).unwrap(); + /// ``` + pub fn parse_posts_from_json( + &self, + json_str: &str, + ) -> Result, serde_json::Error> { + serde_json::from_str(json_str) + } + + /// Build a search index from a collection of posts + /// + /// This method takes posts implementing the [`Post`] trait and generates the filters + /// needed for fast search. It handles tokenization, stop word removal, and filter generation. + /// + /// The process involves: + /// 1. Extracting text content from each post (title, body, meta) + /// 2. Tokenizing and cleaning the text (lowercase, remove punctuation) + /// 3. Filtering out stopwords + /// 4. Creating Xor filters for efficient membership testing + /// + /// # Arguments + /// * `posts` - Vector of posts implementing the [`Post`] trait + /// + /// # Returns + /// * `Ok(SearchIndex)` - Successfully generated search index + /// * `Err(Box)` - Index generation error + /// + /// # Example + /// + /// ```rust + /// use tinysearch::{BasicPost, TinySearch}; + /// use std::collections::HashMap; + /// + /// let posts = vec![ + /// BasicPost { + /// title: "Hello World".to_string(), + /// url: "/hello".to_string(), + /// body: Some("This is my first post".to_string()), + /// meta: HashMap::new(), + /// } + /// ]; + /// + /// let search = TinySearch::new(); + /// let index = search.build_index(&posts).unwrap(); + /// ``` + pub fn build_index( + &self, + posts: &[P], + ) -> Result> { + let prepared_posts = Self::prepare_posts(posts); + let stopwords = self.get_stopwords(); + Ok(Self::generate_filters(prepared_posts, &stopwords)) + } + + /// Search using a pre-built index + /// + /// This method performs a search query against a pre-built search index, + /// returning results sorted by relevance score. Title matches are weighted + /// higher than body matches to prioritize more relevant results. + /// + /// # Arguments + /// * `index` - Pre-built search index from [`build_index`](Self::build_index) + /// * `query` - Search query string + /// * `num_results` - Maximum number of results to return + /// + /// # Returns + /// Vector of matching [`PostId`] references, sorted by relevance (highest first) + /// + /// # Example + /// + /// ```rust + /// use tinysearch::{BasicPost, TinySearch}; + /// use std::collections::HashMap; + /// + /// let posts = vec![ + /// BasicPost { + /// title: "Rust Guide".to_string(), + /// url: "/rust".to_string(), + /// body: Some("Learn Rust programming".to_string()), + /// meta: HashMap::new(), + /// } + /// ]; + /// let search = TinySearch::new(); + /// let index = search.build_index(&posts).unwrap(); + /// + /// let results = search.search(&index, "rust programming", 5); + /// for result in results { + /// println!("Found: {} at {}", result.title, result.url); + /// } + /// ``` + pub fn search<'index>( + &self, + index: &'index SearchIndex, + query: &str, + num_results: usize, + ) -> Vec<&'index PostId> { + crate::search(index, query, num_results) + } + + /// Build a search index and serialize it to bytes + /// + /// This is a convenience method that combines index building and serialization + /// for easy storage to files or databases. The serialized format uses bincode + /// for efficient binary encoding. + /// + /// # Arguments + /// * `posts` - Vector of posts implementing the [`Post`] trait + /// + /// # Returns + /// * `Ok(Vec)` - Serialized index as bytes + /// * `Err(Box)` - Index generation or serialization error + /// + /// # Example + /// + /// ```rust + /// use tinysearch::{BasicPost, TinySearch}; + /// use std::collections::HashMap; + /// + /// let posts = vec![ + /// BasicPost { + /// title: "My Post".to_string(), + /// url: "/post".to_string(), + /// body: Some("Post content".to_string()), + /// meta: HashMap::new(), + /// } + /// ]; + /// let search = TinySearch::new(); + /// + /// // Build and serialize index + /// let index_bytes = search.build_and_serialize_index(&posts).unwrap(); + /// + /// // You can save to file: std::fs::write("search_index.bin", index_bytes).unwrap(); + /// ``` + pub fn build_and_serialize_index( + &self, + posts: &[P], + ) -> Result, Box> { + let filters = self.build_index(posts)?; + let storage = Storage::from(filters); + storage.to_bytes().map_err(std::convert::Into::into) + } + + /// Load a search index from serialized bytes + /// + /// This method deserializes a previously saved search index from bytes. + /// The index must have been created using [`build_and_serialize_index`](Self::build_and_serialize_index) + /// or compatible serialization. + /// + /// # Arguments + /// * `bytes` - Serialized index bytes + /// + /// # Returns + /// * `Ok(SearchIndex)` - Successfully loaded search index + /// * `Err(BincodeError)` - Deserialization error + /// + /// # Example + /// + /// ```rust + /// use tinysearch::{BasicPost, TinySearch}; + /// use std::collections::HashMap; + /// + /// let search = TinySearch::new(); + /// + /// // First create and serialize an index + /// let posts = vec![ + /// BasicPost { + /// title: "Test".to_string(), + /// url: "/test".to_string(), + /// body: Some("content".to_string()), + /// meta: HashMap::new(), + /// } + /// ]; + /// let index_bytes = search.build_and_serialize_index(&posts).unwrap(); + /// + /// // Then load it back + /// let index = search.load_index_from_bytes(&index_bytes).unwrap(); + /// let results = search.search(&index, "content", 10); + /// ``` + pub fn load_index_from_bytes(&self, bytes: &[u8]) -> Result { + let storage = Storage::from_bytes(bytes)?; + Ok(storage.filters) + } +} + +impl Default for TinySearch { + fn default() -> Self { + Self::new() + } +} + +impl TinySearch { + /// Get the stopwords set to use for this instance + fn get_stopwords(&self) -> HashSet { + self.custom_stopwords.clone().unwrap_or_else(|| { + include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/assets/stopwords")) + .split_whitespace() + .map(String::from) + .collect() + }) + } + + /// Remove non-ascii characters from string + /// Keep apostrophe (e.g. for words like "don't") + fn cleanup(s: &str) -> String { + s.replace(|c: char| !(c.is_alphabetic() || c == '\''), " ") + } + + /// Tokenize input text, removing stopwords and normalizing to lowercase + fn tokenize_with_stopwords(words: &str, stopwords: &HashSet) -> HashSet { + Self::cleanup(&strip_markdown(words)) + .split_whitespace() + .filter(|&word| !word.trim().is_empty()) + .map(str::to_lowercase) + .filter(|word| !stopwords.contains(word)) + .collect() + } + + /// Generate filters from prepared posts (internal implementation) + fn generate_filters( + posts: HashMap>, + stopwords: &HashSet, + ) -> SearchIndex { + let split_posts: HashMap>> = posts + .into_iter() + .map(|(post, content)| { + ( + post, + content.map(|content| Self::tokenize_with_stopwords(&content, stopwords)), + ) + }) + .collect(); + + split_posts + .into_iter() + .map(|(post_id, body)| { + // Add title to filter + let title: HashSet = + Self::tokenize_with_stopwords(&post_id.title, stopwords); + + // Add metadata to filter + let metadata: HashSet = if post_id.meta.is_empty() { + HashSet::new() + } else { + Self::tokenize_with_stopwords(&post_id.meta, stopwords) + }; + + let mut content: HashSet = title; + content.extend(metadata); + if let Some(body) = body { + content.extend(body); + } + + let content_vec: Vec = content.into_iter().collect(); + let filter = + HashProxy::::from( + &content_vec, + ); + (post_id, filter) + }) + .collect() + } + + /// Prepare posts for filter generation (internal implementation) + fn prepare_posts(posts: &[P]) -> HashMap> { + posts + .iter() + .map(|post| { + let meta_str = if post.meta().is_empty() { + String::new() + } else { + serde_json::to_string(&post.meta()).unwrap_or_default() + }; + let post_id = PostId { + title: post.title().to_string(), + url: post.url().to_string(), + meta: meta_str, + }; + let body = post.body().map(std::string::ToString::to_string); + (post_id, body) + }) + .collect() + } +} diff --git a/src/bin/tinysearch.rs b/src/bin/tinysearch.rs index e7bc4cf..46b98af 100644 --- a/src/bin/tinysearch.rs +++ b/src/bin/tinysearch.rs @@ -44,8 +44,8 @@ enum DirOrTemp { impl DirOrTemp { pub fn path(&self) -> PathBuf { match self { - DirOrTemp::Path(p) => p.clone(), - DirOrTemp::Temp(p) => p.path().to_path_buf(), + Self::Path(p) => p.clone(), + Self::Temp(p) => p.path().to_path_buf(), } } } @@ -60,7 +60,7 @@ impl FromStr for DirOrTemp { type Err = ::Err; fn from_str(s: &str) -> std::result::Result { - Ok(DirOrTemp::Path(PathBuf::from_str(s)?)) + Ok(Self::Path(PathBuf::from_str(s)?)) } } @@ -119,7 +119,7 @@ struct Opt { #[argh(positional)] input_file: Option, - /// output path for WASM module ("wasm_output" directory by default) + /// output path for WASM module ("`wasm_output`" directory by default) #[argh( option, short = 'p', @@ -138,7 +138,7 @@ struct Opt { /// this version will be used in Cargo.toml for the generated crate /// (only used in wasm, crate modes). This should be a valid TOML table definition. - /// Default is 'version="env!("CARGO_PKG_VERSION")"'. If you have a local version of + /// Default is '`version="env!("CARGO_PKG_VERSION`")"'. If you have a local version of /// tinysearch, you can specify 'path="/path/to/tinysearch"' #[argh( option, @@ -194,13 +194,15 @@ impl Stage for Search { format!("Failed to read input file: {}", self.storage_file.display()) })?; let filters = Storage::from_bytes(&bytes)?.filters; - let results = base_search(&filters, self.term.clone(), self.num_searches); - results.iter().for_each(|result| { + let results = base_search(&filters, &self.term, self.num_searches); + for result in &results { println!( - "Title: {}, Url: {}, Meta: {:?}", - result.0, result.1, result.2 + "Title: {title}, Url: {url}, Meta: {meta}", + title = result.title, + url = result.url, + meta = result.meta ); - }); + } Ok(()) } } @@ -241,7 +243,7 @@ impl Stage for Storage { let posts: Posts = index::read(raw_content) .with_context(|| format!("Failed to decode {}", self.posts_index.display()))?; - trace!("Generating storage from posts: {:#?}", posts); + trace!("Generating storage from posts: {posts:#?}"); storage::write(posts, &storage_file, &self.schema)?; println!("Storage ready in file {}", storage_file.display()); @@ -331,7 +333,7 @@ impl Wasm { impl Stage for Wasm { fn from_opt(opt: &Opt) -> Result { - let crate_path = Wasm::ensure_crate_path(&opt.crate_path)?; + let crate_path = Self::ensure_crate_path(&opt.crate_path)?; let crate_opt = { let mut ret: Opt = opt.clone(); ret.out_path = crate_path.path(); @@ -347,7 +349,7 @@ impl Stage for Wasm { }) } - fn build(self: &Wasm) -> Result<(), Error> { + fn build(&self) -> Result<(), Error> { self.c.build().context("Failed generating crate")?; println!("Compiling WASM module using vanilla cargo build"); let crate_path = self.crate_path.path(); @@ -404,7 +406,12 @@ impl Stage for Wasm { } } - if !self.release { + if self.release { + println!("Created production-ready WASM module"); + println!("See docs for usage instructions"); + println!("Path: {}", dest_wasm.display()); + println!("Size: {} bytes", dest_wasm.metadata()?.len()); + } else { let html_path = self.out_path.join("demo.html"); fs::write( &html_path, @@ -414,11 +421,6 @@ impl Stage for Wasm { println!("All done! WASM module at: {}", dest_wasm.display()); println!("JS loader at: {}", js_path.display()); println!("Demo at: {}", html_path.display()); - } else { - println!("Created production-ready WASM module"); - println!("See docs for usage instructions"); - println!("Path: {}", dest_wasm.display()); - println!("Size: {} bytes", dest_wasm.metadata()?.len()); } Ok(()) } @@ -454,11 +456,11 @@ pub fn main() -> Result<(), Error> { } pub fn run_output(cmd: &mut Command) -> Result { - println!("running {:?}", cmd); + println!("running {cmd:?}"); let output = cmd .stderr(Stdio::inherit()) .output() - .with_context(|| format!("failed to run {:?}", cmd))?; + .with_context(|| format!("failed to run {cmd:?}"))?; if !output.status.success() { anyhow::bail!("failed to execute {:?}\nstatus: {}", cmd, output.status) diff --git a/src/bin/utils/storage.rs b/src/bin/utils/storage.rs index 3695b04..6f42989 100644 --- a/src/bin/utils/storage.rs +++ b/src/bin/utils/storage.rs @@ -6,7 +6,7 @@ use std::path; use super::assets::STOP_WORDS; use super::index::Posts; use strip_markdown::strip_markdown; -use tinysearch::{Filters, PostId, SearchSchema, Storage}; +use tinysearch::{PostId, SearchIndex, SearchSchema, Storage}; use xorf::HashProxy; pub fn write(posts: Posts, path: &path::PathBuf, schema: &SearchSchema) -> Result<(), Error> { @@ -19,7 +19,7 @@ pub fn write(posts: Posts, path: &path::PathBuf, schema: &SearchSchema) -> Resul Ok(()) } -fn build(posts: Posts, schema: &SearchSchema) -> Result { +fn build(posts: Posts, schema: &SearchSchema) -> Result { let posts = prepare_posts(posts, schema); generate_filters(posts) } @@ -41,7 +41,7 @@ fn tokenize(words: &str, stopwords: &HashSet) -> HashSet { // Read all posts and generate Bloomfilters from them. #[unsafe(no_mangle)] -pub fn generate_filters(posts: HashMap>) -> Result { +pub fn generate_filters(posts: HashMap>) -> Result { // Create a dictionary of {"post name": "lowercase word set"}. split_posts = // {name: set(re.split("\W+", contents.lower())) for name, contents in // posts.items()} @@ -52,7 +52,7 @@ pub fn generate_filters(posts: HashMap>) -> Result>> = posts .into_iter() .map(|(post, content)| { - debug!("Generating {:?}", post); + debug!("Generating {post:?}"); (post, content.map(|content| tokenize(&content, &stopwords))) }) .collect(); @@ -64,13 +64,24 @@ pub fn generate_filters(posts: HashMap>) -> Result = tokenize(&post_id.0, &stopwords); - let content: Vec = body.map_or_else( - || title.clone().into_iter().collect(), - |body| body.union(&title).cloned().collect(), - ); - let filter = HashProxy::from(&content); + // Add title to filter + let title: HashSet = tokenize(&post_id.title, &stopwords); + + // Add metadata to filter + let metadata: HashSet = if post_id.meta.is_empty() { + HashSet::new() + } else { + tokenize(&post_id.meta, &stopwords) + }; + + let mut content: HashSet = title; + content.extend(metadata); + if let Some(body) = body { + content.extend(body); + } + + let content_vec: Vec = content.into_iter().collect(); + let filter = HashProxy::from(&content_vec); (post_id, filter) }) .collect(); @@ -100,7 +111,7 @@ pub fn prepare_posts(posts: Posts, schema: &SearchSchema) -> HashMap HashMap HashMap; - -/// Represents a post with its title, URL, and optional metadata -pub type PostId = (Title, Url, Meta); +/// Represents a post with its title, URL, and metadata +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct PostId { + /// Post title + pub title: String, + /// Post URL + pub url: String, + /// Serialized metadata string + pub meta: String, +} /// A post with its associated Xor filter for fast lookups pub type PostFilter = (PostId, HashProxy); -/// Collection of all post filters -pub type Filters = Vec; +/// A deserialized search index containing posts and their search filters +/// +/// This allows users to store and work with search indexes without +/// needing to import the xorf library directly. +/// +/// # Example +/// +/// ```rust +/// use tinysearch::{BasicPost, TinySearch, SearchIndex}; +/// use std::collections::HashMap; +/// +/// let posts = vec![ +/// BasicPost { +/// title: "My Post".to_string(), +/// url: "/my-post".to_string(), +/// body: Some("Post content here".to_string()), +/// meta: HashMap::new(), +/// } +/// ]; +/// +/// let search = TinySearch::new(); +/// let index: SearchIndex = search.build_index(&posts).unwrap(); +/// let results = search.search(&index, "content", 10); +/// ``` +pub type SearchIndex = Vec; + +// Re-export public API types from the API module +pub use api::{BasicPost, Post, TinySearch}; /// Configuration schema for tinysearch.toml #[cfg(feature = "bin")] @@ -53,7 +117,7 @@ pub struct SearchSchema { impl Default for SearchSchema { /// Default schema configuration matching current JSON structure fn default() -> Self { - SearchSchema { + Self { indexed_fields: vec!["title".to_string(), "body".to_string()], metadata_fields: vec![], url_field: "url".to_string(), @@ -68,7 +132,7 @@ impl SearchSchema { let toml_path = path.as_ref().join("tinysearch.toml"); if !toml_path.exists() { - return Ok(SearchSchema::default()); + return Ok(Self::default()); } let toml_content = std::fs::read_to_string(&toml_path) @@ -76,7 +140,6 @@ impl SearchSchema { let config: SearchSchemaConfig = toml::from_str(&toml_content) .map_err(|e| format!("Failed to parse tinysearch.toml: {e}"))?; - // Validate schema config.schema.validate()?; Ok(config.schema) @@ -125,12 +188,12 @@ impl SearchSchema { #[derive(Serialize, Deserialize)] pub struct Storage { /// Vector of post filters for search functionality - pub filters: Filters, + pub filters: SearchIndex, } -impl From for Storage { - fn from(filters: Filters) -> Self { - Storage { filters } +impl From for Storage { + fn from(filters: SearchIndex) -> Self { + Self { filters } } } @@ -157,8 +220,8 @@ impl Storage { /// Deserializes storage from bytes using bincode pub fn from_bytes(bytes: &[u8]) -> Result { - let decoded: Filters = bincode::deserialize(bytes)?; - Ok(Storage { filters: decoded }) + let decoded: SearchIndex = bincode::deserialize(bytes)?; + Ok(Self { filters: decoded }) } } @@ -170,13 +233,15 @@ const TITLE_WEIGHT: usize = 3; /// Calculates a combined score for a post based on title and body matches /// Post title matches are weighted higher than body matches -fn score(title: &str, search_terms: &[String], filter: &Filter) -> usize { - let title_terms: Vec = tokenize(title); +fn score(post_id: &PostId, search_terms: &[String], filter: &Filter) -> usize { + let title_terms: Vec = tokenize(&post_id.title); let title_score: usize = search_terms .iter() .filter(|term| title_terms.contains(term)) .count(); - TITLE_WEIGHT * title_score + filter.score(search_terms) + TITLE_WEIGHT + .saturating_mul(title_score) + .saturating_add(filter.score(search_terms)) } /// Tokenizes a string into lowercase words, removing empty tokens @@ -191,17 +256,21 @@ fn tokenize(s: &str) -> Vec { /// Performs a search query against the provided filters /// /// # Arguments -/// * `filters` - The search index containing all posts and their filters +/// * `index` - The search index containing all posts and their filters /// * `query` - The search query string /// * `num_results` - Maximum number of results to return /// /// # Returns /// Vector of `PostId` references, sorted by relevance score (highest first) -pub fn search(filters: &'_ Filters, query: String, num_results: usize) -> Vec<&'_ PostId> { - let search_terms: Vec = tokenize(&query); - let mut matches: Vec<(&PostId, usize)> = filters +pub fn search<'index>( + index: &'index SearchIndex, + query: &str, + num_results: usize, +) -> Vec<&'index PostId> { + let search_terms: Vec = tokenize(query); + let mut matches: Vec<(&PostId, usize)> = index .iter() - .map(|(post_id, filter)| (post_id, score(&post_id.0, &search_terms, filter))) + .map(|(post_id, filter)| (post_id, score(post_id, &search_terms, filter))) .filter(|(_post_id, score)| *score > 0) .collect(); @@ -212,6 +281,7 @@ pub fn search(filters: &'_ Filters, query: String, num_results: usize) -> Vec<&' #[cfg(test)] #[cfg(feature = "bin")] +#[allow(clippy::panic, clippy::unwrap_used)] mod schema_tests { use super::*; use tempfile::TempDir; @@ -223,7 +293,7 @@ mod schema_tests { assert_eq!(schema.metadata_fields, Vec::::new()); assert_eq!(schema.url_field, "url"); if let Err(e) = schema.validate() { - panic!("Default schema validation failed: {}", e); + panic!("Default schema validation failed: {e}"); } } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index b5b0ff0..d502e4c 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,3 +1,11 @@ +#![allow( + clippy::expect_used, + clippy::unwrap_used, + clippy::panic, + clippy::print_stderr, + clippy::missing_docs_in_private_items +)] + use std::process::Command; use tempfile::TempDir; @@ -22,11 +30,10 @@ fn test_cli_wasm_mode() { .expect("Failed to check installed targets"); let installed_targets = String::from_utf8_lossy(&target_check.stdout); - if !installed_targets.contains("wasm32-unknown-unknown") { - panic!( - "wasm32-unknown-unknown target is not installed. Install it with: rustup target add wasm32-unknown-unknown" - ); - } + assert!( + installed_targets.contains("wasm32-unknown-unknown"), + "wasm32-unknown-unknown target is not installed. Install it with: rustup target add wasm32-unknown-unknown" + ); let temp_dir = TempDir::new().expect("Failed to create temp directory"); @@ -53,15 +60,15 @@ fn test_cli_wasm_mode() { if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); let stdout = String::from_utf8_lossy(&output.stdout); - eprintln!("WASM build failed. Stdout: {}", stdout); - eprintln!("Stderr: {}", stderr); + eprintln!("WASM build failed. Stdout: {stdout}"); + eprintln!("Stderr: {stderr}"); panic!("WASM build failed unexpectedly"); } // Verify that WASM and JS files were created let wasm_files: Vec<_> = std::fs::read_dir(&temp_dir) .expect("Failed to read output directory") - .filter_map(|entry| entry.ok()) + .filter_map(std::result::Result::ok) .filter(|entry| { entry .path() @@ -106,7 +113,7 @@ fn test_cli_storage_mode() { // Storage mode should work with the provided fixtures if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Command failed: {}", stderr); + eprintln!("Command failed: {stderr}"); } assert!(output.status.success()); @@ -173,8 +180,8 @@ url_field = "permalink" if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); let stdout = String::from_utf8_lossy(&output.stdout); - eprintln!("Custom schema build failed. Stdout: {}", stdout); - eprintln!("Stderr: {}", stderr); + eprintln!("Custom schema build failed. Stdout: {stdout}"); + eprintln!("Stderr: {stderr}"); panic!("Custom schema build failed unexpectedly"); } @@ -273,8 +280,8 @@ url_field = "product_url" if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); let stdout = String::from_utf8_lossy(&output.stdout); - eprintln!("Flexible fields build failed. Stdout: {}", stdout); - eprintln!("Stderr: {}", stderr); + eprintln!("Flexible fields build failed. Stdout: {stdout}"); + eprintln!("Stderr: {stderr}"); panic!("Flexible fields build failed unexpectedly"); } From 846b11480b8a438e6591c61f1483d36db0a38f1e Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 1 Sep 2025 13:51:35 +0200 Subject: [PATCH 58/58] Bump version --- Cargo.lock | 2 +- Cargo.toml | 12 ++---------- assets/crate/Cargo.toml.template | 2 +- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 777942e..cc24748 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -334,7 +334,7 @@ dependencies = [ [[package]] name = "tinysearch" -version = "0.9.0" +version = "0.10.0" dependencies = [ "anyhow", "argh", diff --git a/Cargo.toml b/Cargo.toml index e9aab7e..52ffc26 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tinysearch" -version = "0.9.0" +version = "0.10.0" authors = ["Matthias Endler "] edition = "2024" rust-version = "1.85" @@ -46,15 +46,7 @@ features = ["serde"] [features] default = [] -bin = [ - "argh", - "log", - "anyhow", - "tempfile", - "strum", - "toml", - "toml_edit", -] +bin = ["argh", "log", "anyhow", "tempfile", "strum", "toml", "toml_edit"] [dev-dependencies] tempfile = "3.14.0" diff --git a/assets/crate/Cargo.toml.template b/assets/crate/Cargo.toml.template index f67e7e4..e179ad1 100644 --- a/assets/crate/Cargo.toml.template +++ b/assets/crate/Cargo.toml.template @@ -3,7 +3,7 @@ [package] name = "THIS_VALUE_SHOULD_BE_FILLED" authors = ["Matthias Endler "] -version = "0.9.0" +version = "0.10.0" edition = "2024" rust-version = "1.85" description = "A tiny search engine for static websites"