diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d9da4b4..dcb1270 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -35,6 +35,12 @@ importers: '@supabase/supabase-js': specifier: ^2.103.0 version: 2.103.0 + '@types/papaparse': + specifier: ^5.5.2 + version: 5.5.2 + '@types/pdf-parse': + specifier: ^1.1.5 + version: 1.1.5 class-transformer: specifier: ^0.5.1 version: 0.5.1 @@ -44,6 +50,15 @@ importers: dotenv: specifier: ^17.4.2 version: 17.4.2 + node-ofx-parser: + specifier: ^0.5.1 + version: 0.5.1 + papaparse: + specifier: ^5.5.3 + version: 5.5.3 + pdf-parse: + specifier: ^2.4.5 + version: 2.4.5 reflect-metadata: specifier: ^0.2.2 version: 0.2.2 @@ -1393,24 +1408,48 @@ packages: cpu: [arm64] os: [android] + '@napi-rs/canvas-android-arm64@0.1.80': + resolution: {integrity: sha512-sk7xhN/MoXeuExlggf91pNziBxLPVUqF2CAVnB57KLG/pz7+U5TKG8eXdc3pm0d7Od0WreB6ZKLj37sX9muGOQ==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [android] + '@napi-rs/canvas-darwin-arm64@0.1.100': resolution: {integrity: sha512-2PcswRaC7Ly645DGt88///zuFDhJxJYdKAs1uU3mfk1atYkXufgcgLfBpk6Tm12nCQBaNt1wpybuPZ4qOhTo8A==} engines: {node: '>= 10'} cpu: [arm64] os: [darwin] + '@napi-rs/canvas-darwin-arm64@0.1.80': + resolution: {integrity: sha512-O64APRTXRUiAz0P8gErkfEr3lipLJgM6pjATwavZ22ebhjYl/SUbpgM0xcWPQBNMP1n29afAC/Us5PX1vg+JNQ==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [darwin] + '@napi-rs/canvas-darwin-x64@0.1.100': resolution: {integrity: sha512-ePNZtj7pNIva/siZMg+HmbeozkIjqUIYdoymH8HaA3qK7LfzFN4WMBM8G6HQ9ZC+H3+Dnn5pqtiXpgLykaPOhw==} engines: {node: '>= 10'} cpu: [x64] os: [darwin] + '@napi-rs/canvas-darwin-x64@0.1.80': + resolution: {integrity: sha512-FqqSU7qFce0Cp3pwnTjVkKjjOtxMqRe6lmINxpIZYaZNnVI0H5FtsaraZJ36SiTHNjZlUB69/HhxNDT1Aaa9vA==} + engines: {node: '>= 10'} + cpu: [x64] + os: [darwin] + '@napi-rs/canvas-linux-arm-gnueabihf@0.1.100': resolution: {integrity: sha512-d5cDB48oWFGU8/XPhUOFAlySgb/VAu7D+s8fi55K1Pcfg8aPplHWqMgibhVLU8ky7Pyg/fuiVLz4Nf3JrSTuUA==} engines: {node: '>= 10'} cpu: [arm] os: [linux] + '@napi-rs/canvas-linux-arm-gnueabihf@0.1.80': + resolution: {integrity: sha512-eyWz0ddBDQc7/JbAtY4OtZ5SpK8tR4JsCYEZjCE3dI8pqoWUC8oMwYSBGCYfsx2w47cQgQCgMVRVTFiiO38hHQ==} + engines: {node: '>= 10'} + cpu: [arm] + os: [linux] + '@napi-rs/canvas-linux-arm64-gnu@0.1.100': resolution: {integrity: sha512-rDxgxRu69RvDlX/bh9o22DxLsGr8EqsNgotL9+RwQE1S0b0cqeatqsw6aW45mukm0B42DIAaAacKaYQ8cqS1nw==} engines: {node: '>= 10'} @@ -1418,6 +1457,13 @@ packages: os: [linux] libc: [glibc] + '@napi-rs/canvas-linux-arm64-gnu@0.1.80': + resolution: {integrity: sha512-qwA63t8A86bnxhuA/GwOkK3jvb+XTQaTiVML0vAWoHyoZYTjNs7BzoOONDgTnNtr8/yHrq64XXzUoLqDzU+Uuw==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [linux] + libc: [glibc] + '@napi-rs/canvas-linux-arm64-musl@0.1.100': resolution: {integrity: sha512-K3mDW66N+xT2/V439u1alFANiBUjdEx2gLiNYnCmUsva5jZMxWTjafBYwTzYK+EMFMHrUoabuU+T1BIP5CgbYQ==} engines: {node: '>= 10'} @@ -1425,6 +1471,13 @@ packages: os: [linux] libc: [musl] + '@napi-rs/canvas-linux-arm64-musl@0.1.80': + resolution: {integrity: sha512-1XbCOz/ymhj24lFaIXtWnwv/6eFHXDrjP0jYkc6iHQ9q8oXKzUX1Lc6bu+wuGiLhGh2GS/2JlfORC5ZcXimRcg==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [linux] + libc: [musl] + '@napi-rs/canvas-linux-riscv64-gnu@0.1.100': resolution: {integrity: sha512-mooqUBTIsccZpnoQC4NgrC1v6C1vof39etLNMnBwCY+p0gajWJvAHLGQ6g/gGyS5YrpDW+GefSN4+Cvcr08UWw==} engines: {node: '>= 10'} @@ -1432,6 +1485,13 @@ packages: os: [linux] libc: [glibc] + '@napi-rs/canvas-linux-riscv64-gnu@0.1.80': + resolution: {integrity: sha512-XTzR125w5ZMs0lJcxRlS1K3P5RaZ9RmUsPtd1uGt+EfDyYMu4c6SEROYsxyatbbu/2+lPe7MPHOO/0a0x7L/gw==} + engines: {node: '>= 10'} + cpu: [riscv64] + os: [linux] + libc: [glibc] + '@napi-rs/canvas-linux-x64-gnu@0.1.100': resolution: {integrity: sha512-1eCvkDCazm7FFhsT7DfGOdSaHgZVK3bt/dSBl5EWHOWmnz+I7j8tPseJqqD81NF+MH21jKUK4wQSDjN0mdhnTg==} engines: {node: '>= 10'} @@ -1439,6 +1499,13 @@ packages: os: [linux] libc: [glibc] + '@napi-rs/canvas-linux-x64-gnu@0.1.80': + resolution: {integrity: sha512-BeXAmhKg1kX3UCrJsYbdQd3hIMDH/K6HnP/pG2LuITaXhXBiNdh//TVVVVCBbJzVQaV5gK/4ZOCMrQW9mvuTqA==} + engines: {node: '>= 10'} + cpu: [x64] + os: [linux] + libc: [glibc] + '@napi-rs/canvas-linux-x64-musl@0.1.100': resolution: {integrity: sha512-20arT6lnI19S68qNlii73TSEDbECNgzMz2EpldC1V3mZFuRkeujXkcebRk0LRJe9SEUAooYiLokfMViY8IX7yA==} engines: {node: '>= 10'} @@ -1446,6 +1513,13 @@ packages: os: [linux] libc: [musl] + '@napi-rs/canvas-linux-x64-musl@0.1.80': + resolution: {integrity: sha512-x0XvZWdHbkgdgucJsRxprX/4o4sEed7qo9rCQA9ugiS9qE2QvP0RIiEugtZhfLH3cyI+jIRFJHV4Fuz+1BHHMg==} + engines: {node: '>= 10'} + cpu: [x64] + os: [linux] + libc: [musl] + '@napi-rs/canvas-win32-arm64-msvc@0.1.100': resolution: {integrity: sha512-DZFFT1wIAg37LJw37yhMRFfjATd3vTQzjZ1Yki8u2vhO6Hi5VE6BVaGQ1aaDu7xb4iMErz+9EOwjpS7xcxFeBw==} engines: {node: '>= 10'} @@ -1458,10 +1532,20 @@ packages: cpu: [x64] os: [win32] + '@napi-rs/canvas-win32-x64-msvc@0.1.80': + resolution: {integrity: sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==} + engines: {node: '>= 10'} + cpu: [x64] + os: [win32] + '@napi-rs/canvas@0.1.100': resolution: {integrity: sha512-xglYA6q3XO5P3BNJYxVZ1IV7DLVjp1Py6nwag88YntrS+3vKHyYcMqXVS4ZztJmwz2uGvz1FWhI/4LgbR5uQDA==} engines: {node: '>= 10'} + '@napi-rs/canvas@0.1.80': + resolution: {integrity: sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==} + engines: {node: '>= 10'} + '@napi-rs/wasm-runtime@0.2.12': resolution: {integrity: sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==} @@ -2106,6 +2190,12 @@ packages: '@types/node@24.12.2': resolution: {integrity: sha512-A1sre26ke7HDIuY/M23nd9gfB+nrmhtYyMINbjI1zHJxYteKR6qSMX56FsmjMcDb3SMcjJg5BiRRgOCC/yBD0g==} + '@types/papaparse@5.5.2': + resolution: {integrity: sha512-gFnFp/JMzLHCwRf7tQHrNnfhN4eYBVYYI897CGX4MY1tzY9l2aLkVyx2IlKZ/SAqDbB3I1AOZW5gTMGGsqWliA==} + + '@types/pdf-parse@1.1.5': + resolution: {integrity: sha512-kBfrSXsloMnUJOKi25s3+hRmkycHfLK6A09eRGqF/N8BkQoPUmaCr+q8Cli5FnfohEz/rsv82zAiPz/LXtOGhA==} + '@types/qs@6.15.0': resolution: {integrity: sha512-JawvT8iBVWpzTrz3EGw9BTQFg3BQNmwERdKE22vlTxawwtbyUSlMppvZYKLZzB5zgACXdXxbD3m1bXaMqP/9ow==} @@ -3413,6 +3503,10 @@ packages: fast-uri@3.1.0: resolution: {integrity: sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==} + fast-xml-parser@3.21.1: + resolution: {integrity: sha512-FTFVjYoBOZTJekiUsawGsSYV9QL0A+zDYCRj7y34IO6Jg+2IMYEtQa+bbictpdpV8dHxXywqU7C0gRDEOFtBFg==} + hasBin: true + fastq@1.20.1: resolution: {integrity: sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==} @@ -4572,6 +4666,10 @@ packages: node-int64@0.4.0: resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==} + node-ofx-parser@0.5.1: + resolution: {integrity: sha512-YEOf61PPoOt6SvBVMunaxItUBi4TnhODrvc/afoYG8OIN8b63kFJz2u0UcVRcSyyIHOoY/sO+Rf7sA+KgpofJw==} + engines: {node: '>= 0.6.0'} + node-releases@2.0.37: resolution: {integrity: sha512-1h5gKZCF+pO/o3Iqt5Jp7wc9rH3eJJ0+nh/CIoiRwjRxde/hAHyLPXYN4V3CqKAbiZPSeJFSWHmJsbkicta0Eg==} @@ -4679,6 +4777,9 @@ packages: package-json-from-dist@1.0.1: resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==} + papaparse@5.5.3: + resolution: {integrity: sha512-5QvjGxYVjxO59MGU2lHVYpRWBBtKHnlIAcSe1uNFCkkptUh63NFRj0FJQm7nR67puEruUci/ZkjmEFrjCAyP4A==} + parent-module@1.0.1: resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} engines: {node: '>=6'} @@ -4741,6 +4842,11 @@ packages: pathe@2.0.3: resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==} + pdf-parse@2.4.5: + resolution: {integrity: sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==} + engines: {node: '>=20.16.0 <21 || >=22.3.0'} + hasBin: true + pdfjs-dist@5.4.296: resolution: {integrity: sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==} engines: {node: '>=20.16.0 || >=22.3.0'} @@ -5323,6 +5429,9 @@ packages: resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} engines: {node: '>=8'} + strnum@1.1.2: + resolution: {integrity: sha512-vrN+B7DBIoTTZjnPNewwhx6cBA/H+IS7rfW68n7XxC1y7uoiGQBxaKzqucGUgavX15dJgiGztLJ8vxuEzwqBdA==} + strtok3@10.3.5: resolution: {integrity: sha512-ki4hZQfh5rX0QDLLkOCj+h+CVNkqmp/CMf8v8kZpkNVK6jGQooMytqzLZYUVYIZcFZ6yDB70EfD8POcFXiF5oA==} engines: {node: '>=18'} @@ -7528,36 +7637,66 @@ snapshots: '@napi-rs/canvas-android-arm64@0.1.100': optional: true + '@napi-rs/canvas-android-arm64@0.1.80': + optional: true + '@napi-rs/canvas-darwin-arm64@0.1.100': optional: true + '@napi-rs/canvas-darwin-arm64@0.1.80': + optional: true + '@napi-rs/canvas-darwin-x64@0.1.100': optional: true + '@napi-rs/canvas-darwin-x64@0.1.80': + optional: true + '@napi-rs/canvas-linux-arm-gnueabihf@0.1.100': optional: true + '@napi-rs/canvas-linux-arm-gnueabihf@0.1.80': + optional: true + '@napi-rs/canvas-linux-arm64-gnu@0.1.100': optional: true + '@napi-rs/canvas-linux-arm64-gnu@0.1.80': + optional: true + '@napi-rs/canvas-linux-arm64-musl@0.1.100': optional: true + '@napi-rs/canvas-linux-arm64-musl@0.1.80': + optional: true + '@napi-rs/canvas-linux-riscv64-gnu@0.1.100': optional: true + '@napi-rs/canvas-linux-riscv64-gnu@0.1.80': + optional: true + '@napi-rs/canvas-linux-x64-gnu@0.1.100': optional: true + '@napi-rs/canvas-linux-x64-gnu@0.1.80': + optional: true + '@napi-rs/canvas-linux-x64-musl@0.1.100': optional: true + '@napi-rs/canvas-linux-x64-musl@0.1.80': + optional: true + '@napi-rs/canvas-win32-arm64-msvc@0.1.100': optional: true '@napi-rs/canvas-win32-x64-msvc@0.1.100': optional: true + '@napi-rs/canvas-win32-x64-msvc@0.1.80': + optional: true + '@napi-rs/canvas@0.1.100': optionalDependencies: '@napi-rs/canvas-android-arm64': 0.1.100 @@ -7573,6 +7712,19 @@ snapshots: '@napi-rs/canvas-win32-x64-msvc': 0.1.100 optional: true + '@napi-rs/canvas@0.1.80': + optionalDependencies: + '@napi-rs/canvas-android-arm64': 0.1.80 + '@napi-rs/canvas-darwin-arm64': 0.1.80 + '@napi-rs/canvas-darwin-x64': 0.1.80 + '@napi-rs/canvas-linux-arm-gnueabihf': 0.1.80 + '@napi-rs/canvas-linux-arm64-gnu': 0.1.80 + '@napi-rs/canvas-linux-arm64-musl': 0.1.80 + '@napi-rs/canvas-linux-riscv64-gnu': 0.1.80 + '@napi-rs/canvas-linux-x64-gnu': 0.1.80 + '@napi-rs/canvas-linux-x64-musl': 0.1.80 + '@napi-rs/canvas-win32-x64-msvc': 0.1.80 + '@napi-rs/wasm-runtime@0.2.12': dependencies: '@emnapi/core': 1.9.2 @@ -8203,6 +8355,14 @@ snapshots: dependencies: undici-types: 7.16.0 + '@types/papaparse@5.5.2': + dependencies: + '@types/node': 24.12.2 + + '@types/pdf-parse@1.1.5': + dependencies: + '@types/node': 24.12.2 + '@types/qs@6.15.0': {} '@types/range-parser@1.2.7': {} @@ -9661,6 +9821,10 @@ snapshots: fast-uri@3.1.0: {} + fast-xml-parser@3.21.1: + dependencies: + strnum: 1.1.2 + fastq@1.20.1: dependencies: reusify: 1.1.0 @@ -10935,6 +11099,10 @@ snapshots: node-int64@0.4.0: {} + node-ofx-parser@0.5.1: + dependencies: + fast-xml-parser: 3.21.1 + node-releases@2.0.37: {} normalize-path@3.0.0: {} @@ -11061,6 +11229,8 @@ snapshots: package-json-from-dist@1.0.1: {} + papaparse@5.5.3: {} + parent-module@1.0.1: dependencies: callsites: 3.1.0 @@ -11110,6 +11280,11 @@ snapshots: pathe@2.0.3: {} + pdf-parse@2.4.5: + dependencies: + '@napi-rs/canvas': 0.1.80 + pdfjs-dist: 5.4.296 + pdfjs-dist@5.4.296: optionalDependencies: '@napi-rs/canvas': 0.1.100 @@ -11792,6 +11967,8 @@ snapshots: strip-json-comments@3.1.1: {} + strnum@1.1.2: {} + strtok3@10.3.5: dependencies: '@tokenizer/token': 0.3.0 diff --git a/tehriehlbudget-backend/package.json b/tehriehlbudget-backend/package.json index 976e7e7..270fce5 100644 --- a/tehriehlbudget-backend/package.json +++ b/tehriehlbudget-backend/package.json @@ -1,6 +1,6 @@ { "name": "tehriehlbudget-backend", - "version": "0.3.2", + "version": "0.4.0", "description": "", "author": "", "private": true, @@ -35,9 +35,14 @@ "@nestjs/platform-express": "^11.0.1", "@prisma/client": "^6.19.3", "@supabase/supabase-js": "^2.103.0", + "@types/papaparse": "^5.5.2", + "@types/pdf-parse": "^1.1.5", "class-transformer": "^0.5.1", "class-validator": "^0.15.1", "dotenv": "^17.4.2", + "node-ofx-parser": "^0.5.1", + "papaparse": "^5.5.3", + "pdf-parse": "^2.4.5", "reflect-metadata": "^0.2.2", "rxjs": "^7.8.1" }, @@ -92,6 +97,7 @@ "/node_modules/", "/generated/", ".*\\.module\\.ts$", + ".*\\.d\\.ts$", "src/main\\.ts$" ], "coverageDirectory": "coverage", diff --git a/tehriehlbudget-backend/prisma/migrations/20260527203542_add_transaction_external_id/migration.sql b/tehriehlbudget-backend/prisma/migrations/20260527203542_add_transaction_external_id/migration.sql new file mode 100644 index 0000000..c9d9709 --- /dev/null +++ b/tehriehlbudget-backend/prisma/migrations/20260527203542_add_transaction_external_id/migration.sql @@ -0,0 +1,9 @@ +-- Add external_id column for statement-import dedupe via bank-provided transaction IDs (e.g. OFX FITID). +-- Not encrypted: needs to be indexed for fast lookup, and is an opaque bank ID rather than sensitive PII. +ALTER TABLE "transactions" ADD COLUMN "external_id" TEXT; + +-- Speeds up duplicate detection via externalId (statement re-imports). +CREATE INDEX "transactions_account_id_external_id_idx" ON "transactions"("account_id", "external_id"); + +-- Speeds up the windowed date+amount duplicate-detection query and the existing date-range filters. +CREATE INDEX "transactions_account_id_date_idx" ON "transactions"("account_id", "date"); diff --git a/tehriehlbudget-backend/prisma/schema.prisma b/tehriehlbudget-backend/prisma/schema.prisma index fbfb748..c90392f 100644 --- a/tehriehlbudget-backend/prisma/schema.prisma +++ b/tehriehlbudget-backend/prisma/schema.prisma @@ -78,6 +78,7 @@ model Transaction { notes String? date DateTime receiptPath String? @map("receipt_path") + externalId String? @map("external_id") createdAt DateTime @default(now()) @map("created_at") updatedAt DateTime @updatedAt @map("updated_at") @@ -86,6 +87,8 @@ model Transaction { destinationAccount Account? @relation("DestinationAccountTransactions", fields: [destinationAccountId], references: [id], onDelete: Cascade) category Category? @relation(fields: [categoryId], references: [id], onDelete: SetNull) + @@index([accountId, externalId]) + @@index([accountId, date]) @@map("transactions") } diff --git a/tehriehlbudget-backend/src/app.module.ts b/tehriehlbudget-backend/src/app.module.ts index 44b18a3..3b13c04 100644 --- a/tehriehlbudget-backend/src/app.module.ts +++ b/tehriehlbudget-backend/src/app.module.ts @@ -13,6 +13,7 @@ import { AggregationsModule } from './aggregations/aggregations.module'; import { AdvisorModule } from './advisor/advisor.module'; import { ValuationsModule } from './valuations/valuations.module'; import { ActivityLogModule } from './activity-log/activity-log.module'; +import { StatementsModule } from './statements/statements.module'; @Module({ imports: [ @@ -28,6 +29,7 @@ import { ActivityLogModule } from './activity-log/activity-log.module'; AdvisorModule, ValuationsModule, ActivityLogModule, + StatementsModule, ], controllers: [AppController], providers: [AppService], diff --git a/tehriehlbudget-backend/src/statements/dto/parse-statement.dto.spec.ts b/tehriehlbudget-backend/src/statements/dto/parse-statement.dto.spec.ts new file mode 100644 index 0000000..341a145 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/dto/parse-statement.dto.spec.ts @@ -0,0 +1,63 @@ +import 'reflect-metadata'; +import { plainToInstance } from 'class-transformer'; +import { validateSync } from 'class-validator'; +import { ParseStatementDto } from './parse-statement.dto'; + +describe('ParseStatementDto', () => { + const baseAccountId = 'a1b2c3d4-1111-4222-9333-1234567890ab'; + + it('accepts a bare accountId', () => { + const dto = plainToInstance(ParseStatementDto, { + accountId: baseAccountId, + }); + expect(validateSync(dto)).toHaveLength(0); + expect(dto.mapping).toBeUndefined(); + }); + + it('parses mapping when sent as a JSON string (multipart form field)', () => { + const dto = plainToInstance(ParseStatementDto, { + accountId: baseAccountId, + mapping: JSON.stringify({ + date: 'When', + description: 'What', + amount: 'How Much', + }), + }); + expect(validateSync(dto)).toHaveLength(0); + expect(dto.mapping?.date).toBe('When'); + expect(dto.mapping?.description).toBe('What'); + expect(dto.mapping?.amount).toBe('How Much'); + }); + + it('drops mapping silently when the JSON string parses to a non-object (e.g. number)', () => { + const dto = plainToInstance(ParseStatementDto, { + accountId: baseAccountId, + mapping: '42', + }); + expect(validateSync(dto)).toHaveLength(0); + expect(dto.mapping).toBeUndefined(); + }); + + it('drops mapping silently when JSON is malformed', () => { + const dto = plainToInstance(ParseStatementDto, { + accountId: baseAccountId, + mapping: '{not json', + }); + expect(validateSync(dto)).toHaveLength(0); + expect(dto.mapping).toBeUndefined(); + }); + + it('accepts mapping when already an object', () => { + const dto = plainToInstance(ParseStatementDto, { + accountId: baseAccountId, + mapping: { date: 'D', description: 'X', amount: 'A' }, + }); + expect(validateSync(dto)).toHaveLength(0); + expect(dto.mapping?.date).toBe('D'); + }); + + it('rejects a non-UUID accountId', () => { + const dto = plainToInstance(ParseStatementDto, { accountId: 'nope' }); + expect(validateSync(dto).length).toBeGreaterThan(0); + }); +}); diff --git a/tehriehlbudget-backend/src/statements/dto/parse-statement.dto.ts b/tehriehlbudget-backend/src/statements/dto/parse-statement.dto.ts new file mode 100644 index 0000000..910b043 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/dto/parse-statement.dto.ts @@ -0,0 +1,33 @@ +import { IsObject, IsOptional, IsUUID } from 'class-validator'; +import { Transform } from 'class-transformer'; + +export interface ColumnMappingDto { + date?: string; + description?: string; + amount?: string; + debit?: string; + credit?: string; + type?: string; +} + +export class ParseStatementDto { + @IsUUID() + accountId: string; + + @IsOptional() + @IsObject() + @Transform(({ value }) => { + if (typeof value === 'string') { + try { + const parsed = JSON.parse(value); + return typeof parsed === 'object' && parsed !== null + ? parsed + : undefined; + } catch { + return undefined; + } + } + return value; + }) + mapping?: ColumnMappingDto; +} diff --git a/tehriehlbudget-backend/src/statements/duplicate-detector.service.spec.ts b/tehriehlbudget-backend/src/statements/duplicate-detector.service.spec.ts new file mode 100644 index 0000000..bf33dea --- /dev/null +++ b/tehriehlbudget-backend/src/statements/duplicate-detector.service.spec.ts @@ -0,0 +1,263 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { DuplicateDetectorService } from './duplicate-detector.service'; +import { PrismaService } from '../prisma/prisma.service'; +import type { ParsedRow } from './parsers/parser.interface'; + +jest.mock('@prisma/client', () => ({ + PrismaClient: class {}, + TransactionType: { INCOME: 'INCOME', EXPENSE: 'EXPENSE', TRANSFER: 'TRANSFER' }, +})); + +const row = (over: Partial = {}): ParsedRow => ({ + sourceIndex: 0, + date: '2026-04-10', + amount: 42.1, + type: 'EXPENSE', + description: 'Coffee Shop', + confidence: 0.95, + ...over, +}); + +describe('DuplicateDetectorService', () => { + let service: DuplicateDetectorService; + const mockPrisma: any = { + transaction: { + findMany: jest.fn(), + }, + }; + + beforeEach(async () => { + jest.resetAllMocks(); + const module: TestingModule = await Test.createTestingModule({ + providers: [ + DuplicateDetectorService, + { provide: PrismaService, useValue: mockPrisma }, + ], + }).compile(); + service = module.get(DuplicateDetectorService); + }); + + describe('externalId-based dedupe', () => { + it('flags rows whose externalId matches an existing transaction as duplicate with confidence 1', async () => { + mockPrisma.transaction.findMany + // externalId lookup + .mockResolvedValueOnce([ + { + id: 'existing-1', + externalId: 'FITID-A', + accountId: 'acc-1', + amount: 42.1, + date: new Date('2026-04-10T12:00:00Z'), + description: 'Coffee', + }, + ]) + // windowed dedupe for the remaining row + .mockResolvedValueOnce([]) + // cross-account check + .mockResolvedValueOnce([]); + + const result = await service.classify('user-1', 'acc-1', [ + row({ externalId: 'FITID-A', sourceIndex: 0 }), + row({ externalId: 'FITID-B', sourceIndex: 1, amount: 9, description: 'Diff' }), + ]); + + expect(result.get(0)).toMatchObject({ + status: 'duplicate', + confidence: 1, + duplicateOf: expect.objectContaining({ id: 'existing-1' }), + }); + expect(result.get(1)?.status).toBe('new'); + }); + + it('skips externalId lookup when no rows have one', async () => { + mockPrisma.transaction.findMany.mockResolvedValue([]); + await service.classify('user-1', 'acc-1', [ + row({ sourceIndex: 0 }), + row({ sourceIndex: 1, amount: 5 }), + ]); + // No externalId call. Windowed dedupe and cross-account transfer-pairing + // both run, but no findMany was filtered by externalId. + const calls = mockPrisma.transaction.findMany.mock.calls; + const externalIdCalls = calls.filter( + ([arg]: any[]) => arg?.where?.externalId, + ); + expect(externalIdCalls).toHaveLength(0); + }); + }); + + describe('heuristic match', () => { + it('flags as duplicate when amount + date (±1 day) + similar description match', async () => { + mockPrisma.transaction.findMany + // windowed dedupe call + .mockResolvedValueOnce([ + { + id: 'existing-1', + externalId: null, + accountId: 'acc-1', + amount: 42.1, + date: new Date('2026-04-10T12:00:00Z'), + description: 'COFFEE SHOP #1', + }, + ]) + // cross-account call (no candidates) + .mockResolvedValueOnce([]); + + const result = await service.classify('user-1', 'acc-1', [ + row({ amount: 42.1, date: '2026-04-10', description: 'Coffee Shop #1' }), + ]); + + expect(result.get(0)?.status).toBe('duplicate'); + expect(result.get(0)?.confidence).toBeGreaterThanOrEqual(0.9); + }); + + it('flags as needs_review when amount close and date within ±3 days but description differs', async () => { + mockPrisma.transaction.findMany + .mockResolvedValueOnce([ + { + id: 'existing-2', + externalId: null, + accountId: 'acc-1', + amount: 42.1, + date: new Date('2026-04-08T12:00:00Z'), + description: 'Some completely different thing', + }, + ]) + .mockResolvedValueOnce([]); + + const result = await service.classify('user-1', 'acc-1', [ + row({ amount: 42.1, date: '2026-04-10', description: 'Coffee Shop' }), + ]); + + expect(result.get(0)?.status).toBe('needs_review'); + }); + + it('returns new for rows with no match in the window', async () => { + mockPrisma.transaction.findMany + .mockResolvedValueOnce([ + { + id: 'existing-3', + externalId: null, + accountId: 'acc-1', + amount: 9999, + date: new Date('2026-04-10T12:00:00Z'), + description: 'Nope', + }, + ]) + .mockResolvedValueOnce([]); + + const result = await service.classify('user-1', 'acc-1', [ + row({ amount: 42.1 }), + ]); + + expect(result.get(0)?.status).toBe('new'); + }); + }); + + describe('transfer detection', () => { + it('flags possible_transfer when an opposite-sign matching row exists on another account', async () => { + mockPrisma.transaction.findMany + // same-account window (no externalId on this row → no externalId call) + .mockResolvedValueOnce([]) + // cross-account window + .mockResolvedValueOnce([ + { + id: 'other-acc-txn', + accountId: 'acc-2', + amount: 200, + date: new Date('2026-04-10T12:00:00Z'), + description: 'Transfer from checking', + account: { name: 'Savings' }, + }, + ]); + + const result = await service.classify('user-1', 'acc-1', [ + row({ amount: 200, type: 'EXPENSE', description: 'Transfer to savings' }), + ]); + + expect(result.get(0)?.status).toBe('possible_transfer'); + expect(result.get(0)?.transferCandidate).toMatchObject({ + accountId: 'acc-2', + accountName: 'Savings', + matchedTransactionId: 'other-acc-txn', + }); + }); + + it('ignores cross-account candidates with a different amount', async () => { + mockPrisma.transaction.findMany + .mockResolvedValueOnce([]) // windowed + .mockResolvedValueOnce([ + { + id: 'cross-mismatch', + accountId: 'acc-2', + amount: 199, + date: new Date('2026-04-10T12:00:00Z'), + description: 'Different', + account: { name: 'Savings' }, + }, + { + id: 'cross-match', + accountId: 'acc-2', + amount: 200, + date: new Date('2026-04-10T12:00:00Z'), + description: 'Right', + account: { name: 'Savings' }, + }, + ]); + const result = await service.classify('user-1', 'acc-1', [ + row({ amount: 200 }), + ]); + expect(result.get(0)?.transferCandidate?.matchedTransactionId).toBe( + 'cross-match', + ); + }); + + it('ignores cross-account candidates with the same accountId (defensive)', async () => { + mockPrisma.transaction.findMany + .mockResolvedValueOnce([]) // windowed + .mockResolvedValueOnce([ + { + id: 'leaked-same-account', + accountId: 'acc-1', + amount: 200, + date: new Date('2026-04-10T12:00:00Z'), + description: 'Should be ignored', + account: { name: 'Self' }, + }, + ]); + const result = await service.classify('user-1', 'acc-1', [ + row({ amount: 200 }), + ]); + expect(result.get(0)?.status).toBe('new'); + }); + + it('does not consider duplicate rows for transfer pairing', async () => { + // Row has externalId, so externalId call happens first and resolves to duplicate. + // No remaining rows for the windowed or cross-account calls. + mockPrisma.transaction.findMany.mockResolvedValueOnce([ + { + id: 'existing-dup', + externalId: 'F-1', + accountId: 'acc-1', + amount: 200, + date: new Date('2026-04-10T12:00:00Z'), + description: 'x', + }, + ]); + + const result = await service.classify('user-1', 'acc-1', [ + row({ externalId: 'F-1', amount: 200 }), + ]); + expect(result.get(0)?.status).toBe('duplicate'); + expect(result.get(0)?.transferCandidate).toBeUndefined(); + expect(mockPrisma.transaction.findMany).toHaveBeenCalledTimes(1); + }); + }); + + describe('returns a status for every row', () => { + it('handles an empty input array without querying', async () => { + const result = await service.classify('user-1', 'acc-1', []); + expect(result.size).toBe(0); + expect(mockPrisma.transaction.findMany).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/tehriehlbudget-backend/src/statements/duplicate-detector.service.ts b/tehriehlbudget-backend/src/statements/duplicate-detector.service.ts new file mode 100644 index 0000000..f971ec2 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/duplicate-detector.service.ts @@ -0,0 +1,289 @@ +import { Injectable } from '@nestjs/common'; +import { PrismaService } from '../prisma/prisma.service'; +import type { ParsedRow } from './parsers/parser.interface'; + +const MS_PER_DAY = 24 * 60 * 60 * 1000; +const SAME_AMOUNT_EPSILON = 0.005; +const NEAR_AMOUNT_EPSILON = 0.02; +const STRICT_DATE_WINDOW_DAYS = 1; +const LOOSE_DATE_WINDOW_DAYS = 3; +const TRANSFER_WINDOW_DAYS = 3; +const STRONG_DESCRIPTION_SIMILARITY = 0.85; +const WEAK_DESCRIPTION_SIMILARITY = 0.7; + +export type ClassificationStatus = + | 'new' + | 'duplicate' + | 'needs_review' + | 'possible_transfer'; + +export interface DuplicateMatch { + id: string; + date: string; + amount: number; + description: string; +} + +export interface TransferCandidate { + accountId: string; + accountName: string; + matchedTransactionId: string; +} + +export interface ClassifiedRow { + status: ClassificationStatus; + confidence: number; + duplicateOf?: DuplicateMatch; + transferCandidate?: TransferCandidate; +} + +function parseInputDate(date: string): Date { + const datePart = date.slice(0, 10); + const [y, m, d] = datePart.split('-').map(Number); + return new Date(Date.UTC(y, m - 1, d, 12, 0, 0)); +} + +function dayDiff(a: Date, b: Date): number { + return Math.abs(a.getTime() - b.getTime()) / MS_PER_DAY; +} + +function normalizeDescription(input: string): string { + return input + .toLowerCase() + .replace(/^(pos|debit|ach|credit|wire)\s+/g, '') + .replace(/[#*]?\d{3,}\b/g, '') + .replace(/[^a-z0-9 ]+/g, ' ') + .replace(/\s+/g, ' ') + .trim(); +} + +function jaroWinkler(a: string, b: string): number { + if (a === b) return 1; + if (!a.length || !b.length) return 0; + const matchWindow = Math.max(0, Math.floor(Math.max(a.length, b.length) / 2) - 1); + const aMatches = new Array(a.length).fill(false); + const bMatches = new Array(b.length).fill(false); + let matches = 0; + for (let i = 0; i < a.length; i++) { + const start = Math.max(0, i - matchWindow); + const end = Math.min(i + matchWindow + 1, b.length); + for (let j = start; j < end; j++) { + if (bMatches[j]) continue; + if (a[i] !== b[j]) continue; + aMatches[i] = true; + bMatches[j] = true; + matches += 1; + break; + } + } + if (matches === 0) return 0; + let transpositions = 0; + let k = 0; + for (let i = 0; i < a.length; i++) { + if (!aMatches[i]) continue; + while (!bMatches[k]) k += 1; + if (a[i] !== b[k]) transpositions += 1; + k += 1; + } + transpositions /= 2; + const jaro = + (matches / a.length + + matches / b.length + + (matches - transpositions) / matches) / + 3; + let prefix = 0; + for (let i = 0; i < Math.min(4, a.length, b.length); i++) { + if (a[i] === b[i]) prefix += 1; + else break; + } + return jaro + prefix * 0.1 * (1 - jaro); +} + +interface ExistingTxn { + id: string; + externalId?: string | null; + amount: number | string | { toString(): string }; + date: Date; + description: string; + account?: { name: string }; + accountId?: string; +} + +function existingToMatch(t: ExistingTxn): DuplicateMatch { + return { + id: t.id, + date: t.date.toISOString().slice(0, 10), + amount: Number(t.amount), + description: t.description, + }; +} + +@Injectable() +export class DuplicateDetectorService { + constructor(private prisma: PrismaService) {} + + async classify( + userId: string, + accountId: string, + rows: ParsedRow[], + ): Promise> { + const result = new Map(); + if (rows.length === 0) return result; + + // 1. externalId lookup + const idsByRow = new Map(); + for (const r of rows) { + if (r.externalId) { + const arr = idsByRow.get(r.externalId) ?? []; + arr.push(r.sourceIndex); + idsByRow.set(r.externalId, arr); + } + } + if (idsByRow.size > 0) { + const existing = ((await this.prisma.transaction.findMany({ + where: { + userId, + accountId, + externalId: { in: Array.from(idsByRow.keys()) }, + }, + })) ?? []) as ExistingTxn[]; + const byExtId = new Map(existing.map((e) => [e.externalId!, e])); + for (const [extId, indices] of idsByRow.entries()) { + const match = byExtId.get(extId); + if (!match) continue; + for (const idx of indices) { + result.set(idx, { + status: 'duplicate', + confidence: 1, + duplicateOf: existingToMatch(match), + }); + } + } + } + + // 2. windowed query for same account (heuristic dedupe) + const remaining = rows.filter((r) => !result.has(r.sourceIndex)); + if (remaining.length > 0) { + const dates = remaining.map((r) => parseInputDate(r.date)); + const minDate = new Date( + Math.min(...dates.map((d) => d.getTime())) - + LOOSE_DATE_WINDOW_DAYS * MS_PER_DAY, + ); + const maxDate = new Date( + Math.max(...dates.map((d) => d.getTime())) + + LOOSE_DATE_WINDOW_DAYS * MS_PER_DAY, + ); + const candidates = ((await this.prisma.transaction.findMany({ + where: { + userId, + accountId, + date: { gte: minDate, lte: maxDate }, + }, + })) ?? []) as ExistingTxn[]; + + for (const row of remaining) { + const rowDate = parseInputDate(row.date); + const rowNormDesc = normalizeDescription(row.description); + let bestStrong: { match: ExistingTxn; sim: number } | null = null; + let bestNear: { match: ExistingTxn; sim: number; dDiff: number } | null = null; + for (const c of candidates) { + const dDiff = dayDiff(rowDate, c.date); + const amountDiff = Math.abs(Number(c.amount) - row.amount); + const sim = jaroWinkler( + rowNormDesc, + normalizeDescription(c.description), + ); + if ( + amountDiff <= SAME_AMOUNT_EPSILON && + dDiff <= STRICT_DATE_WINDOW_DAYS && + sim >= STRONG_DESCRIPTION_SIMILARITY && + (!bestStrong || sim > bestStrong.sim) + ) { + bestStrong = { match: c, sim }; + } else if ( + amountDiff <= NEAR_AMOUNT_EPSILON && + dDiff <= LOOSE_DATE_WINDOW_DAYS && + (!bestNear || + dDiff < bestNear.dDiff || + (dDiff === bestNear.dDiff && sim > bestNear.sim)) + ) { + bestNear = { match: c, sim, dDiff }; + } + } + if (bestStrong) { + result.set(row.sourceIndex, { + status: 'duplicate', + confidence: 0.95, + duplicateOf: existingToMatch(bestStrong.match), + }); + } else if (bestNear) { + result.set(row.sourceIndex, { + status: 'needs_review', + confidence: 0.75, + duplicateOf: existingToMatch(bestNear.match), + }); + } + } + } + + // 3. cross-account transfer pairing for rows that didn't dedupe + const unmatched = rows.filter((r) => !result.has(r.sourceIndex)); + if (unmatched.length > 0) { + const dates = unmatched.map((r) => parseInputDate(r.date)); + const minDate = new Date( + Math.min(...dates.map((d) => d.getTime())) - + TRANSFER_WINDOW_DAYS * MS_PER_DAY, + ); + const maxDate = new Date( + Math.max(...dates.map((d) => d.getTime())) + + TRANSFER_WINDOW_DAYS * MS_PER_DAY, + ); + const crossCandidates = ((await this.prisma.transaction.findMany({ + where: { + userId, + accountId: { not: accountId }, + date: { gte: minDate, lte: maxDate }, + }, + include: { account: { select: { name: true } } }, + })) ?? []) as ExistingTxn[]; + + for (const row of unmatched) { + const rowDate = parseInputDate(row.date); + let best: ExistingTxn | null = null; + let bestDayDiff = Infinity; + for (const c of crossCandidates) { + if (c.accountId === accountId) continue; + if (Math.abs(Number(c.amount) - row.amount) > SAME_AMOUNT_EPSILON) { + continue; + } + const dDiff = dayDiff(rowDate, c.date); + if (dDiff > TRANSFER_WINDOW_DAYS) continue; + if (dDiff < bestDayDiff) { + best = c; + bestDayDiff = dDiff; + } + } + if (best && best.accountId && best.account?.name) { + result.set(row.sourceIndex, { + status: 'possible_transfer', + confidence: 0.85, + transferCandidate: { + accountId: best.accountId, + accountName: best.account.name, + matchedTransactionId: best.id, + }, + }); + } + } + } + + // 4. everything else is new + for (const r of rows) { + if (!result.has(r.sourceIndex)) { + result.set(r.sourceIndex, { status: 'new', confidence: r.confidence }); + } + } + + return result; + } +} diff --git a/tehriehlbudget-backend/src/statements/parsers/column-mapping.spec.ts b/tehriehlbudget-backend/src/statements/parsers/column-mapping.spec.ts new file mode 100644 index 0000000..343f262 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/column-mapping.spec.ts @@ -0,0 +1,86 @@ +import { guessColumnMapping, isMappingUsable } from './column-mapping'; + +describe('guessColumnMapping', () => { + it('matches Chase-style headers (Posting Date / Description / Amount / Type)', () => { + const m = guessColumnMapping([ + 'Posting Date', + 'Description', + 'Amount', + 'Type', + 'Balance', + ]); + expect(m.date).toBe('Posting Date'); + expect(m.description).toBe('Description'); + expect(m.amount).toBe('Amount'); + expect(m.type).toBe('Type'); + }); + + it('matches BoA-style debit/credit headers', () => { + const m = guessColumnMapping(['Date', 'Description', 'Debit', 'Credit']); + expect(m.date).toBe('Date'); + expect(m.debit).toBe('Debit'); + expect(m.credit).toBe('Credit'); + expect(m.amount).toBeUndefined(); + }); + + it('matches alternate column names (Transaction Date, Payee, Withdrawals, Deposits)', () => { + const m = guessColumnMapping([ + 'Trans. Date', + 'Payee', + 'Withdrawals', + 'Deposits', + ]); + expect(m.date).toBe('Trans. Date'); + expect(m.description).toBe('Payee'); + expect(m.debit).toBe('Withdrawals'); + expect(m.credit).toBe('Deposits'); + }); + + it('does not match unknown headers', () => { + const m = guessColumnMapping(['When', 'What', 'How Much', 'Direction']); + // "When" and "How Much" match aliases we added; "What" matches description. + expect(m.date).toBe('When'); + expect(m.amount).toBe('How Much'); + expect(m.description).toBe('What'); + }); + + it('returns nothing for entirely foreign headers', () => { + const m = guessColumnMapping(['Col1', 'Col2', 'Col3']); + expect(m.date).toBeUndefined(); + expect(m.amount).toBeUndefined(); + expect(m.description).toBeUndefined(); + }); +}); + +describe('isMappingUsable', () => { + it('requires date, description, and either amount or both debit+credit', () => { + expect( + isMappingUsable({ + date: 'Date', + description: 'Desc', + amount: 'Amount', + }), + ).toBe(true); + expect( + isMappingUsable({ + date: 'Date', + description: 'Desc', + debit: 'Debit', + credit: 'Credit', + }), + ).toBe(true); + expect( + isMappingUsable({ date: 'Date', description: 'Desc' }), + ).toBe(false); + expect( + isMappingUsable({ + date: 'Date', + description: 'Desc', + debit: 'Debit', + }), + ).toBe(false); + expect(isMappingUsable({ description: 'Desc', amount: 'Amount' })).toBe( + false, + ); + }); +}); diff --git a/tehriehlbudget-backend/src/statements/parsers/column-mapping.ts b/tehriehlbudget-backend/src/statements/parsers/column-mapping.ts new file mode 100644 index 0000000..17743b7 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/column-mapping.ts @@ -0,0 +1,77 @@ +import type { ColumnMapping } from './parser.interface'; + +const DATE_PATTERNS = [ + /^(posting|transaction|trans\.?|trade)\s*date$/, + /^date(?:\s|$)/, + /^when$/, +]; + +const DESCRIPTION_PATTERNS = [ + /^description$/, + /^payee$/, + /^memo$/, + /^merchant$/, + /^narration$/, + /^details?$/, + /^name$/, + /^what$/, +]; + +const AMOUNT_PATTERNS = [ + /^amount$/, + /^value$/, + /^transaction\s*amount$/, + /^how\s*much$/, +]; + +const DEBIT_PATTERNS = [ + /^debit$/, + /^debit\s*amount$/, + /^withdrawals?$/, + /^money\s*out$/, +]; + +const CREDIT_PATTERNS = [ + /^credit$/, + /^credit\s*amount$/, + /^deposits?$/, + /^money\s*in$/, +]; + +const TYPE_PATTERNS = [/^type$/, /^transaction\s*type$/, /^dr\/?cr$/]; + +function normalize(header: string): string { + return header.trim().toLowerCase(); +} + +function matches(header: string, patterns: RegExp[]): boolean { + const n = normalize(header); + return patterns.some((p) => p.test(n)); +} + +export function guessColumnMapping(headers: string[]): ColumnMapping { + const mapping: ColumnMapping = {}; + for (const header of headers) { + if (!mapping.date && matches(header, DATE_PATTERNS)) { + mapping.date = header; + } else if (!mapping.description && matches(header, DESCRIPTION_PATTERNS)) { + mapping.description = header; + } else if (!mapping.amount && matches(header, AMOUNT_PATTERNS)) { + mapping.amount = header; + } else if (!mapping.debit && matches(header, DEBIT_PATTERNS)) { + mapping.debit = header; + } else if (!mapping.credit && matches(header, CREDIT_PATTERNS)) { + mapping.credit = header; + } else if (!mapping.type && matches(header, TYPE_PATTERNS)) { + mapping.type = header; + } + } + return mapping; +} + +export function isMappingUsable(mapping: ColumnMapping): boolean { + if (!mapping.date || !mapping.description) return false; + const hasAmount = + !!mapping.amount || (!!mapping.debit && !!mapping.credit); + return hasAmount; +} diff --git a/tehriehlbudget-backend/src/statements/parsers/csv.parser.spec.ts b/tehriehlbudget-backend/src/statements/parsers/csv.parser.spec.ts new file mode 100644 index 0000000..a96896f --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/csv.parser.spec.ts @@ -0,0 +1,333 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { AccountType } from '@prisma/client'; +import { CsvParser } from './csv.parser'; + +jest.mock('@prisma/client', () => ({ + AccountType: { + CHECKING: 'CHECKING', + SAVINGS: 'SAVINGS', + CREDIT: 'CREDIT', + LOAN: 'LOAN', + STOCK: 'STOCK', + CASH: 'CASH', + INVESTMENT: 'INVESTMENT', + RETIREMENT: 'RETIREMENT', + }, +})); + +const fixturesDir = path.join( + __dirname, + '../../../test/fixtures/statements', +); +const loadFixture = (name: string) => + fs.readFileSync(path.join(fixturesDir, name)); + +describe('CsvParser', () => { + const parser = new CsvParser(); + const checkingAccount = { type: AccountType.CHECKING }; + const creditAccount = { type: AccountType.CREDIT }; + + describe('canParse', () => { + it('accepts text/csv mime', () => { + expect(parser.canParse({ buffer: Buffer.from(''), mimetype: 'text/csv' })).toBe(true); + }); + it('accepts .csv extension', () => { + expect( + parser.canParse({ buffer: Buffer.from(''), originalname: 'export.csv' }), + ).toBe(true); + }); + it('rejects pdf mime', () => { + expect( + parser.canParse({ buffer: Buffer.from(''), mimetype: 'application/pdf' }), + ).toBe(false); + }); + }); + + describe('signed-amount CSV (Chase style)', () => { + it('parses negatives as EXPENSE and positives as INCOME', async () => { + const result = await parser.parse( + { buffer: loadFixture('chase-signed.csv'), originalname: 'chase.csv' }, + { account: checkingAccount }, + ); + + expect(result.rows).toHaveLength(4); + const [r0, r1, r2, r3] = result.rows; + + expect(r0.date).toBe('2026-04-02'); + expect(r0.amount).toBe(42.1); + expect(r0.type).toBe('EXPENSE'); + expect(r0.description).toContain('AMZN'); + + expect(r1.amount).toBe(2500); + expect(r1.type).toBe('INCOME'); + expect(r1.description).toMatch(/payroll/i); + + expect(r2.amount).toBe(6.75); + expect(r2.type).toBe('EXPENSE'); + + expect(r3.amount).toBe(150); + expect(r3.type).toBe('EXPENSE'); + + for (const r of result.rows) { + expect(r.amount).toBeGreaterThan(0); + expect(r.confidence).toBeGreaterThanOrEqual(0.9); + } + }); + }); + + describe('debit/credit columns (BoA style)', () => { + it('uses Debit column → EXPENSE, Credit column → INCOME', async () => { + const result = await parser.parse( + { buffer: loadFixture('boa-debit-credit.csv'), originalname: 'boa.csv' }, + { account: checkingAccount }, + ); + + expect(result.rows).toHaveLength(4); + const [r0, r1, r2, r3] = result.rows; + expect(r0.amount).toBe(82.41); + expect(r0.type).toBe('EXPENSE'); + expect(r1.amount).toBe(15.0); + expect(r1.type).toBe('INCOME'); + expect(r2.amount).toBe(38.2); + expect(r2.type).toBe('EXPENSE'); + expect(r3.amount).toBe(3.42); + expect(r3.type).toBe('INCOME'); + }); + }); + + describe('positive-only amount on a credit-card account (Amex style)', () => { + it('treats positive rows as EXPENSE (charges) and negative as INCOME (payments) on CREDIT accounts', async () => { + const result = await parser.parse( + { buffer: loadFixture('amex-credit.csv'), originalname: 'amex.csv' }, + { account: creditAccount }, + ); + + expect(result.rows).toHaveLength(4); + const [r0, , r2] = result.rows; + expect(r0.amount).toBe(52.1); + expect(r0.type).toBe('EXPENSE'); + // The autopay payment line is a negative number on the statement. + expect(r2.amount).toBe(450); + expect(r2.type).toBe('INCOME'); + }); + }); + + describe('unknown headers', () => { + it('returns needsMapping when guess is not usable', async () => { + const result = await parser.parse( + { + buffer: Buffer.from( + 'Col1,Col2,Col3\nfoo,bar,baz\nqux,quux,corge\n', + ), + originalname: 'weird.csv', + }, + { account: checkingAccount }, + ); + + expect(result.rows).toHaveLength(0); + expect(result.needsMapping).toBeDefined(); + expect(result.needsMapping!.headers).toEqual(['Col1', 'Col2', 'Col3']); + expect(result.needsMapping!.sample.length).toBeGreaterThan(0); + }); + + it('uses an explicit mapping when provided', async () => { + const buf = Buffer.from( + 'When,What,How Much,Direction\n2026-04-02,Coffee Shop,4.50,out\n2026-04-03,Side Gig,200.00,in\n', + ); + const result = await parser.parse( + { buffer: buf, originalname: 'custom.csv' }, + { + account: checkingAccount, + mapping: { + date: 'When', + description: 'What', + amount: 'How Much', + type: 'Direction', + }, + }, + ); + + expect(result.rows).toHaveLength(2); + expect(result.rows[0].description).toBe('Coffee Shop'); + expect(result.rows[0].amount).toBe(4.5); + expect(result.rows[0].type).toBe('EXPENSE'); + expect(result.rows[1].type).toBe('INCOME'); + }); + }); + + describe('edge cases', () => { + it('strips a UTF-8 BOM before parsing', async () => { + const csv = 'Date,Description,Amount\n2026-04-02,Coffee,-3.50\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'bom.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].amount).toBe(3.5); + }); + + it('handles semicolon-delimited CSV (EU banks)', async () => { + const csv = + 'Date;Description;Amount\n2026-04-02;Bäckerei;-5,40\n2026-04-03;Gehalt;1500,00\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'eu.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(2); + expect(result.rows[0].amount).toBeCloseTo(5.4, 2); + expect(result.rows[1].amount).toBe(1500); + }); + + it('skips rows missing required fields rather than crashing', async () => { + const csv = + 'Date,Description,Amount\n2026-04-02,Good,-10\n,Missing date,-20\n2026-04-04,Missing amount,\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'partial.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.warnings.length).toBeGreaterThan(0); + }); + + it('returns an empty result on an empty CSV without crashing', async () => { + const result = await parser.parse( + { buffer: Buffer.from(''), originalname: 'empty.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(0); + }); + + it('parses EU-format dates (DD.MM.YYYY)', async () => { + const csv = 'Date,Description,Amount\n02.04.2026,EU Coffee,-3.50\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'eu.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].date).toBe('2026-04-02'); + }); + + it('falls back to Date parsing for non-standard date formats', async () => { + const csv = 'Date,Description,Amount\nApr 2 2026,Coffee,-3.50\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'date.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].date).toBe('2026-04-02'); + }); + + it('warns when a row has an unparseable date', async () => { + const csv = 'Date,Description,Amount\nnot-a-date,Mystery,-5\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'bad.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(0); + expect(result.warnings.length).toBeGreaterThan(0); + }); + + it('accepts two-digit US-format years', async () => { + const csv = 'Date,Description,Amount\n4/2/26,Coffee,-3.50\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'short.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].date).toBe('2026-04-02'); + }); + + it('uses an explicit Dr/Cr type column override on a positive amount', async () => { + const csv = + 'Date,Description,Amount,Dr/Cr\n2026-04-02,Refund,15.00,CR\n2026-04-03,Coffee,3.50,DR\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'drcr.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(2); + expect(result.rows[0].type).toBe('INCOME'); + expect(result.rows[1].type).toBe('EXPENSE'); + }); + + it('rejects rows with unparseable amounts gracefully', async () => { + const csv = 'Date,Description,Amount\n2026-04-02,Coffee,not-a-number\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'badamt.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(0); + expect(result.warnings.length).toBeGreaterThan(0); + }); + + it('handles a thousands-separated amount like 1,234.56', async () => { + const csv = 'Date,Description,Amount\n2026-04-02,Big Coffee,"-1,234.56"\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'big.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].amount).toBe(1234.56); + }); + + it('handles application/vnd.ms-excel as a CSV mimetype', () => { + expect( + parser.canParse({ + buffer: Buffer.from(''), + mimetype: 'application/vnd.ms-excel', + }), + ).toBe(true); + }); + + it('parses EU thousands-separated amounts (1.234,56 form)', async () => { + const csv = + 'Date;Description;Amount\n2026-04-02;Big Coffee;-1.234,56\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'eu-thousands.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].amount).toBeCloseTo(1234.56, 2); + }); + + it('parses comma-only amounts with no decimals as integers', async () => { + const csv = 'Date,Description,Amount\n2026-04-02,Big Coffee,"1,234"\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'int.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].amount).toBe(1234); + }); + + it('defaults to INCOME (low confidence) on assets when type column has unknown value', async () => { + const csv = + 'Date,Description,Amount,Type\n2026-04-02,Mystery,99.00,UNKNOWN\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'unk.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].type).toBe('INCOME'); + expect(result.rows[0].confidence).toBeLessThan(0.9); + }); + + it('skips debit/credit rows where both columns are empty', async () => { + const csv = 'Date,Description,Debit,Credit\n2026-04-02,No money,,\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'empty.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(0); + }); + + it('skips zero-amount rows in signed mode', async () => { + const csv = 'Date,Description,Amount\n2026-04-02,Free,0.00\n'; + const result = await parser.parse( + { buffer: Buffer.from(csv), originalname: 'zero.csv' }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(0); + }); + }); +}); diff --git a/tehriehlbudget-backend/src/statements/parsers/csv.parser.ts b/tehriehlbudget-backend/src/statements/parsers/csv.parser.ts new file mode 100644 index 0000000..e8c2920 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/csv.parser.ts @@ -0,0 +1,265 @@ +import * as Papa from 'papaparse'; +import { AccountType } from '@prisma/client'; +import { + ColumnMapping, + ParseOptions, + ParseResult, + ParsedRow, + ParserFileInput, + StatementParser, +} from './parser.interface'; +import { guessColumnMapping, isMappingUsable } from './column-mapping'; + +const LIABILITY_TYPES: AccountType[] = [AccountType.CREDIT, AccountType.LOAN]; + +const POSITIVE_TYPE_KEYWORDS = [ + 'credit', + 'deposit', + 'income', + 'in', + 'cr', + 'refund', + 'payment received', +]; +const NEGATIVE_TYPE_KEYWORDS = [ + 'debit', + 'sale', + 'purchase', + 'withdrawal', + 'expense', + 'out', + 'dr', + 'fee', +]; + +function stripBom(input: string): string { + return input.charCodeAt(0) === 0xfeff ? input.slice(1) : input; +} + +function normalizeWhitespace(value: string): string { + return value.replace(/\s+/g, ' ').trim(); +} + +function parseDate(input: string): string | null { + const trimmed = input.trim(); + if (!trimmed) return null; + // YYYY-MM-DD + const iso = trimmed.match(/^(\d{4})-(\d{2})-(\d{2})/); + if (iso) return `${iso[1]}-${iso[2]}-${iso[3]}`; + // MM/DD/YYYY or M/D/YYYY + const us = trimmed.match(/^(\d{1,2})\/(\d{1,2})\/(\d{2,4})/); + if (us) { + const yyyy = us[3].length === 2 ? `20${us[3]}` : us[3]; + return `${yyyy}-${us[1].padStart(2, '0')}-${us[2].padStart(2, '0')}`; + } + // DD.MM.YYYY (EU) + const eu = trimmed.match(/^(\d{1,2})\.(\d{1,2})\.(\d{2,4})/); + if (eu) { + const yyyy = eu[3].length === 2 ? `20${eu[3]}` : eu[3]; + return `${yyyy}-${eu[2].padStart(2, '0')}-${eu[1].padStart(2, '0')}`; + } + const fallback = new Date(trimmed); + if (!isNaN(fallback.getTime())) { + const y = fallback.getUTCFullYear(); + const m = String(fallback.getUTCMonth() + 1).padStart(2, '0'); + const d = String(fallback.getUTCDate()).padStart(2, '0'); + return `${y}-${m}-${d}`; + } + return null; +} + +function parseNumber(input: string): number | null { + const cleaned = input.replace(/[$£€\s]/g, ''); + if (!cleaned) return null; + // Handle "1.234,56" (EU) vs "1,234.56" (US). If both present, the LAST one is the decimal. + let normalized = cleaned; + if (cleaned.includes(',') && cleaned.includes('.')) { + if (cleaned.lastIndexOf(',') > cleaned.lastIndexOf('.')) { + normalized = cleaned.replace(/\./g, '').replace(',', '.'); + } else { + normalized = cleaned.replace(/,/g, ''); + } + } else if (cleaned.includes(',') && !cleaned.includes('.')) { + // Pure comma — treat as decimal if there's exactly one and 1-2 digits after. + const parts = cleaned.split(','); + if (parts.length === 2 && parts[1].length <= 2) { + normalized = `${parts[0]}.${parts[1]}`; + } else { + normalized = cleaned.replace(/,/g, ''); + } + } + const n = parseFloat(normalized); + return isNaN(n) ? null : n; +} + +function rowHasPositiveType(typeValue: string | undefined): boolean | null { + if (!typeValue) return null; + const lower = typeValue.toLowerCase().trim(); + if (POSITIVE_TYPE_KEYWORDS.some((k) => lower === k || lower.includes(k))) { + return true; + } + if (NEGATIVE_TYPE_KEYWORDS.some((k) => lower === k || lower.includes(k))) { + return false; + } + return null; +} + +interface InterpretedAmount { + amount: number; + positive: boolean; + confidence: number; +} + +function interpretAmount( + row: Record, + mapping: ColumnMapping, +): InterpretedAmount | null { + // Debit/Credit pair takes precedence — it's unambiguous. + if (mapping.debit && mapping.credit) { + const debit = parseNumber(row[mapping.debit] ?? ''); + const credit = parseNumber(row[mapping.credit] ?? ''); + if (debit && debit !== 0) { + return { amount: Math.abs(debit), positive: false, confidence: 0.98 }; + } + if (credit && credit !== 0) { + return { amount: Math.abs(credit), positive: true, confidence: 0.98 }; + } + return null; + } + + if (!mapping.amount) return null; + const raw = parseNumber(row[mapping.amount] ?? ''); + if (raw === null) return null; + + if (raw < 0) { + return { amount: -raw, positive: false, confidence: 0.95 }; + } + if (raw > 0) { + // Need to decide if positive means income or expense. + if (mapping.type) { + const inferred = rowHasPositiveType(row[mapping.type]); + if (inferred !== null) { + return { amount: raw, positive: inferred, confidence: 0.95 }; + } + } + // No type column — default depends on account type, decided by caller. + return { amount: raw, positive: true, confidence: 0.7 }; + } + return null; +} + +function deriveType( + interp: InterpretedAmount, + accountType: AccountType, + hasAmountOnly: boolean, +): { type: 'INCOME' | 'EXPENSE'; confidence: number } { + // For debit/credit columns: positive=credit=INCOME on assets, INCOME on liabilities too + // (e.g., refund on credit card decreases debt = INCOME by our model). + // For a single signed-amount column where positive defaults to income on + // assets but EXPENSE on credit cards (purchases come in as positives on + // many credit-card statements). + const isLiability = LIABILITY_TYPES.includes(accountType); + if (hasAmountOnly && isLiability) { + // Reverse: positives on credit cards are charges (EXPENSE), negatives are payments (INCOME). + if (interp.positive) { + return { type: 'EXPENSE', confidence: interp.confidence }; + } + return { type: 'INCOME', confidence: interp.confidence }; + } + return { + type: interp.positive ? 'INCOME' : 'EXPENSE', + confidence: interp.confidence, + }; +} + +function detectDelimiter(sample: string): string { + const firstLine = sample.split(/\r?\n/)[0] ?? ''; + const candidates = [',', ';', '\t', '|']; + let best = ','; + let bestCount = 0; + for (const c of candidates) { + const count = firstLine.split(c).length; + if (count > bestCount) { + bestCount = count; + best = c; + } + } + return best; +} + +export class CsvParser implements StatementParser { + format = 'csv' as const; + + canParse(file: ParserFileInput): boolean { + if (file.mimetype === 'text/csv') return true; + if (file.mimetype === 'application/vnd.ms-excel') return true; + if (file.originalname?.toLowerCase().endsWith('.csv')) return true; + return false; + } + + async parse( + file: ParserFileInput, + options: ParseOptions, + ): Promise { + const raw = stripBom(file.buffer.toString('utf8')); + if (!raw.trim()) { + return { rows: [], warnings: [] }; + } + + const delimiter = detectDelimiter(raw); + const parsed = Papa.parse>(raw, { + header: true, + skipEmptyLines: true, + delimiter, + }); + + const headers = parsed.meta.fields ?? []; + const rawRows = parsed.data ?? []; + + const warnings: string[] = []; + + let mapping = options.mapping ?? guessColumnMapping(headers); + if (!isMappingUsable(mapping)) { + return { + rows: [], + warnings, + needsMapping: { + headers, + sample: rawRows.slice(0, 5).map((r) => headers.map((h) => r[h] ?? '')), + guess: mapping, + }, + }; + } + + const rows: ParsedRow[] = []; + const hasAmountOnly = !!mapping.amount && !(mapping.debit && mapping.credit); + + rawRows.forEach((row, i) => { + const dateRaw = mapping.date ? row[mapping.date] : ''; + const date = parseDate(dateRaw ?? ''); + const description = mapping.description + ? normalizeWhitespace(row[mapping.description] ?? '') + : ''; + const interp = interpretAmount(row, mapping); + + if (!date || !description || !interp) { + warnings.push( + `Row ${i + 1}: skipped — missing date, description, or amount`, + ); + return; + } + const derived = deriveType(interp, options.account.type, hasAmountOnly); + rows.push({ + sourceIndex: i, + date, + amount: Math.round(interp.amount * 100) / 100, + type: derived.type, + description, + rawMemo: description, + confidence: derived.confidence, + }); + }); + + return { rows, warnings }; + } +} diff --git a/tehriehlbudget-backend/src/statements/parsers/ofx.parser.spec.ts b/tehriehlbudget-backend/src/statements/parsers/ofx.parser.spec.ts new file mode 100644 index 0000000..a3d3186 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/ofx.parser.spec.ts @@ -0,0 +1,202 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { AccountType } from '@prisma/client'; +import { OfxParser } from './ofx.parser'; + +jest.mock('@prisma/client', () => ({ + AccountType: { + CHECKING: 'CHECKING', + SAVINGS: 'SAVINGS', + CREDIT: 'CREDIT', + LOAN: 'LOAN', + STOCK: 'STOCK', + CASH: 'CASH', + INVESTMENT: 'INVESTMENT', + RETIREMENT: 'RETIREMENT', + }, +})); + +const fixturesDir = path.join( + __dirname, + '../../../test/fixtures/statements', +); +const loadFixture = (name: string) => + fs.readFileSync(path.join(fixturesDir, name)); + +describe('OfxParser', () => { + const parser = new OfxParser(); + const checkingAccount = { type: AccountType.CHECKING }; + const savingsAccount = { type: AccountType.SAVINGS }; + const creditAccount = { type: AccountType.CREDIT }; + + describe('canParse', () => { + it('accepts buffers beginning with OFXHEADER (SGML)', () => { + expect( + parser.canParse({ buffer: Buffer.from('OFXHEADER:100\nDATA:OFXSGML\n') }), + ).toBe(true); + }); + it('accepts XML OFX with the { + expect( + parser.canParse({ + buffer: Buffer.from('\n'), + }), + ).toBe(true); + }); + it('accepts .ofx and .qfx file extensions', () => { + expect( + parser.canParse({ buffer: Buffer.from(''), originalname: 'foo.ofx' }), + ).toBe(true); + expect( + parser.canParse({ buffer: Buffer.from(''), originalname: 'foo.qfx' }), + ).toBe(true); + }); + it('rejects unrelated content', () => { + expect( + parser.canParse({ buffer: Buffer.from('Date,Description,Amount\n') }), + ).toBe(false); + }); + }); + + describe('OFX 1.x SGML', () => { + it('extracts STMTTRN rows with FITID as externalId', async () => { + const result = await parser.parse( + { buffer: loadFixture('sample-v1.ofx'), originalname: 'sample.ofx' }, + { account: checkingAccount }, + ); + + expect(result.rows).toHaveLength(3); + const [r0, r1, r2] = result.rows; + expect(r0.date).toBe('2026-04-02'); + expect(r0.amount).toBe(42.1); + expect(r0.type).toBe('EXPENSE'); + expect(r0.externalId).toBe('20260402-001'); + expect(r0.description).toContain('AMZN'); + + expect(r1.amount).toBe(2500); + expect(r1.type).toBe('INCOME'); + expect(r1.externalId).toBe('20260403-001'); + + expect(r2.amount).toBe(6.75); + expect(r2.type).toBe('EXPENSE'); + expect(r2.description).toMatch(/STARBUCKS/); + // MEMO should be captured as rawMemo + expect(r2.rawMemo).toBe('Coffee'); + }); + }); + + describe('OFX 2.x XML', () => { + it('extracts transactions from XML-format OFX', async () => { + const result = await parser.parse( + { buffer: loadFixture('sample-v2.ofx'), originalname: 'sample-v2.ofx' }, + { account: savingsAccount }, + ); + + expect(result.rows).toHaveLength(2); + expect(result.rows[0].type).toBe('INCOME'); + expect(result.rows[0].amount).toBe(3.42); + expect(result.rows[1].type).toBe('EXPENSE'); + expect(result.rows[1].amount).toBe(500); + }); + }); + + describe('QFX (credit card) — CCSTMTRS branch', () => { + it('inverts sign for credit-card statements: negative = charge (EXPENSE), positive = payment (INCOME)', async () => { + const result = await parser.parse( + { + buffer: loadFixture('sample-credit-card.qfx'), + originalname: 'cc.qfx', + }, + { account: creditAccount }, + ); + + expect(result.rows).toHaveLength(2); + const [charge, payment] = result.rows; + // -52.10 on a credit card = charge = EXPENSE + expect(charge.amount).toBe(52.1); + expect(charge.type).toBe('EXPENSE'); + // 450 positive on a credit card = autopay payment = INCOME + expect(payment.amount).toBe(450); + expect(payment.type).toBe('INCOME'); + }); + }); + + describe('robustness', () => { + it('handles a single STMTTRN (not an array) gracefully', async () => { + const single = `OFXHEADER:100 +DATA:OFXSGML + + + + + + +CHECKING + + + +DEBIT +20260402 +-10.00 +SINGLE-1 +Single transaction + + + + + +`; + const result = await parser.parse( + { buffer: Buffer.from(single) }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].externalId).toBe('SINGLE-1'); + }); + + it('skips entries without dates or amounts and logs a warning', async () => { + const broken = `OFXHEADER:100 +DATA:OFXSGML + + + + + + +CHECKING + + + +DEBIT +20260402 +-10.00 +OK-1 +OK row + + +DEBIT +BROKEN-1 +Missing date and amount + + + + + +`; + const result = await parser.parse( + { buffer: Buffer.from(broken) }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + expect(result.warnings.length).toBeGreaterThan(0); + }); + + it('throws a friendly error when content is not OFX at all', async () => { + await expect( + parser.parse( + { buffer: Buffer.from('not really ofx') }, + { account: checkingAccount }, + ), + ).rejects.toThrow(/OFX/); + }); + }); +}); diff --git a/tehriehlbudget-backend/src/statements/parsers/ofx.parser.ts b/tehriehlbudget-backend/src/statements/parsers/ofx.parser.ts new file mode 100644 index 0000000..93d42d1 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/ofx.parser.ts @@ -0,0 +1,137 @@ +import { AccountType } from '@prisma/client'; +import { + ParseOptions, + ParseResult, + ParsedRow, + ParserFileInput, + StatementParser, +} from './parser.interface'; + +const ofxLib: { parse: (data: string) => any } = require('node-ofx-parser'); + +const LIABILITY_TYPES: AccountType[] = [AccountType.CREDIT, AccountType.LOAN]; + +function asArray(value: T | T[] | undefined): T[] { + if (value === undefined || value === null) return []; + return Array.isArray(value) ? value : [value]; +} + +function parseOfxDate(raw: string | undefined): string | null { + if (!raw) return null; + const cleaned = String(raw).trim(); + // OFX dates are YYYYMMDD[HHMMSS[.XXX]][TZ] + const match = cleaned.match(/^(\d{4})(\d{2})(\d{2})/); + if (!match) return null; + return `${match[1]}-${match[2]}-${match[3]}`; +} + +function parseAmount(raw: string | undefined): number | null { + if (raw === undefined || raw === null) return null; + const n = parseFloat(String(raw).trim()); + return isNaN(n) ? null : n; +} + +function normalizeString(input: string | undefined): string { + if (!input) return ''; + return String(input).replace(/\s+/g, ' ').trim(); +} + +function findTransactionLists(parsed: any): any[] { + const lists: any[] = []; + const ofx = parsed?.OFX; + if (!ofx) return lists; + + // Bank statement + const bankResp = asArray(ofx.BANKMSGSRSV1?.STMTTRNRS); + for (const resp of bankResp) { + const stmt = resp?.STMTRS; + if (stmt?.BANKTRANLIST) lists.push(stmt.BANKTRANLIST); + } + // Credit-card statement + const ccResp = asArray(ofx.CREDITCARDMSGSRSV1?.CCSTMTTRNRS); + for (const resp of ccResp) { + const stmt = resp?.CCSTMTRS; + if (stmt?.BANKTRANLIST) lists.push(stmt.BANKTRANLIST); + } + return lists; +} + +export class OfxParser implements StatementParser { + format = 'ofx' as const; + + canParse(file: ParserFileInput): boolean { + const name = file.originalname?.toLowerCase() ?? ''; + if (name.endsWith('.ofx') || name.endsWith('.qfx')) return true; + const head = file.buffer.slice(0, 256).toString('utf8'); + if (/OFXHEADER\s*[:=]/i.test(head)) return true; + if (/<\?OFX\b/i.test(head)) return true; + return false; + } + + async parse( + file: ParserFileInput, + options: ParseOptions, + ): Promise { + const raw = file.buffer.toString('utf8'); + let parsed: any; + try { + parsed = ofxLib.parse(raw); + } catch { + throw new Error( + 'Unable to read this OFX/QFX file. The file may be corrupt or in an unsupported format.', + ); + } + if (!parsed?.OFX || typeof parsed.OFX !== 'object') { + throw new Error( + 'This file does not appear to be an OFX/QFX statement.', + ); + } + + const isLiability = LIABILITY_TYPES.includes(options.account.type); + const lists = findTransactionLists(parsed); + const rows: ParsedRow[] = []; + const warnings: string[] = []; + + let sourceIndex = 0; + for (const list of lists) { + const txns = asArray(list.STMTTRN); + for (const t of txns) { + const date = parseOfxDate(t.DTPOSTED); + const amountRaw = parseAmount(t.TRNAMT); + const description = + normalizeString(t.NAME) || normalizeString(t.PAYEE?.NAME); + if (!date || amountRaw === null || !description) { + warnings.push( + `Skipped transaction at position ${sourceIndex + 1}: missing date, amount, or description`, + ); + sourceIndex += 1; + continue; + } + + const absAmount = Math.abs(amountRaw); + const positive = amountRaw > 0; + let type: 'INCOME' | 'EXPENSE'; + if (isLiability) { + // Credit-card semantics: negative TRNAMT = charge (EXPENSE), positive = payment (INCOME). + type = positive ? 'INCOME' : 'EXPENSE'; + } else { + type = positive ? 'INCOME' : 'EXPENSE'; + } + + rows.push({ + sourceIndex, + date, + amount: Math.round(absAmount * 100) / 100, + type, + description, + externalId: t.FITID ? String(t.FITID).trim() : undefined, + rawMemo: t.MEMO ? normalizeString(t.MEMO) : undefined, + confidence: 0.98, + }); + sourceIndex += 1; + } + } + + return { rows, warnings }; + } +} diff --git a/tehriehlbudget-backend/src/statements/parsers/parser.interface.ts b/tehriehlbudget-backend/src/statements/parsers/parser.interface.ts new file mode 100644 index 0000000..a859190 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/parser.interface.ts @@ -0,0 +1,52 @@ +import type { AccountType } from '@prisma/client'; + +export type ParsedRowType = 'INCOME' | 'EXPENSE'; + +export interface ParsedRow { + sourceIndex: number; + date: string; + amount: number; + type: ParsedRowType; + description: string; + externalId?: string; + rawMemo?: string; + confidence: number; +} + +export interface ColumnMapping { + date?: string; + description?: string; + amount?: string; + debit?: string; + credit?: string; + type?: string; +} + +export interface NeedsMapping { + headers: string[]; + sample: string[][]; + guess: ColumnMapping; +} + +export interface ParseResult { + rows: ParsedRow[]; + warnings: string[]; + needsMapping?: NeedsMapping; +} + +export interface ParserFileInput { + buffer: Buffer; + mimetype?: string; + originalname?: string; +} + +export interface ParseOptions { + mapping?: ColumnMapping; + account: { type: AccountType }; +} + +export interface StatementParser { + format: 'csv' | 'ofx' | 'pdf'; + canParse(file: ParserFileInput): boolean; + parse(file: ParserFileInput, options: ParseOptions): Promise; +} diff --git a/tehriehlbudget-backend/src/statements/parsers/pdf.parser.spec.ts b/tehriehlbudget-backend/src/statements/parsers/pdf.parser.spec.ts new file mode 100644 index 0000000..a985031 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/pdf.parser.spec.ts @@ -0,0 +1,194 @@ +import { AccountType } from '@prisma/client'; +import { PdfParser, __setPdfParseForTesting } from './pdf.parser'; + +jest.mock('@prisma/client', () => ({ + AccountType: { + CHECKING: 'CHECKING', + SAVINGS: 'SAVINGS', + CREDIT: 'CREDIT', + LOAN: 'LOAN', + }, +})); + +describe('PdfParser', () => { + const parser = new PdfParser(); + const checkingAccount = { type: AccountType.CHECKING }; + const creditAccount = { type: AccountType.CREDIT }; + + afterEach(() => { + __setPdfParseForTesting(null); + }); + + describe('canParse', () => { + it('accepts PDF mime, .pdf extension, and %PDF magic bytes', () => { + expect( + parser.canParse({ + buffer: Buffer.from(''), + mimetype: 'application/pdf', + }), + ).toBe(true); + expect( + parser.canParse({ buffer: Buffer.from(''), originalname: 'foo.pdf' }), + ).toBe(true); + expect(parser.canParse({ buffer: Buffer.from('%PDF-1.4\n') })).toBe(true); + }); + + it('rejects non-PDF content', () => { + expect( + parser.canParse({ + buffer: Buffer.from('Date,Amount\n'), + mimetype: 'text/csv', + }), + ).toBe(false); + }); + }); + + describe('text extraction', () => { + it('detects rows by date+amount patterns and normalizes them', async () => { + __setPdfParseForTesting({ + parse: async () => ({ + text: `Chase Statement +04/02/2026 AMZN MKTP US*ABC123 -42.10 +04/03/2026 DIRECT DEPOSIT PAYROLL 2500.00 +04/04/2026 STARBUCKS #4321 -6.75`, + }), + }); + + const result = await parser.parse( + { buffer: Buffer.from('%PDF-1.4'), originalname: 'chase.pdf' }, + { account: checkingAccount }, + ); + + expect(result.rows).toHaveLength(3); + const [r0, r1, r2] = result.rows; + expect(r0.date).toBe('2026-04-02'); + expect(r0.amount).toBe(42.1); + expect(r0.type).toBe('EXPENSE'); + expect(r0.description).toMatch(/AMZN/); + expect(r0.confidence).toBeLessThan(0.9); + + expect(r1.amount).toBe(2500); + expect(r1.type).toBe('INCOME'); + + expect(r2.amount).toBe(6.75); + }); + + it('parses YYYY-MM-DD and "Apr 2, 2026" date formats', async () => { + __setPdfParseForTesting({ + parse: async () => ({ + text: `2026-04-02 Coffee Shop -3.50 +Apr 3, 2026 Refund 15.00`, + }), + }); + const result = await parser.parse( + { buffer: Buffer.from('%PDF-1.4') }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(2); + expect(result.rows[0].date).toBe('2026-04-02'); + expect(result.rows[1].date).toBe('2026-04-03'); + expect(result.rows[1].type).toBe('INCOME'); + }); + + it('honors trailing CR/DR markers (common in some statements)', async () => { + __setPdfParseForTesting({ + parse: async () => ({ + text: `04/02/2026 Coffee Shop 25.00 DR +04/03/2026 Refund 10.00 CR`, + }), + }); + const result = await parser.parse( + { buffer: Buffer.from('%PDF-1.4') }, + { account: checkingAccount }, + ); + expect(result.rows[0].type).toBe('EXPENSE'); + expect(result.rows[1].type).toBe('INCOME'); + }); + + it('inverts sign semantics on credit-card accounts', async () => { + __setPdfParseForTesting({ + parse: async () => ({ + text: `04/02/2026 RESTAURANT XYZ 52.10 +04/15/2026 AUTOPAY PAYMENT - THANK YOU -450.00`, + }), + }); + const result = await parser.parse( + { buffer: Buffer.from('%PDF-1.4') }, + { account: creditAccount }, + ); + // 52.10 positive on a credit card = charge = EXPENSE + expect(result.rows[0].type).toBe('EXPENSE'); + // -450 on a credit card = payment = INCOME + expect(result.rows[1].type).toBe('INCOME'); + }); + + it('handles thousands-separated amounts', async () => { + __setPdfParseForTesting({ + parse: async () => ({ + text: `04/02/2026 LARGE BILL 1,234.56`, + }), + }); + const result = await parser.parse( + { buffer: Buffer.from('%PDF-1.4') }, + { account: checkingAccount }, + ); + expect(result.rows[0].amount).toBeCloseTo(1234.56, 2); + }); + + it('skips lines without an obvious date or amount', async () => { + __setPdfParseForTesting({ + parse: async () => ({ + text: `Statement Period +Page 1 of 2 +04/02/2026 Coffee Shop -3.50 +Account Number: 1234`, + }), + }); + const result = await parser.parse( + { buffer: Buffer.from('%PDF-1.4') }, + { account: checkingAccount }, + ); + expect(result.rows).toHaveLength(1); + }); + }); + + describe('error cases', () => { + it('throws a friendly error on scanned (image-only) PDFs', async () => { + __setPdfParseForTesting({ + parse: async () => ({ text: '' }), + }); + await expect( + parser.parse( + { buffer: Buffer.from('%PDF-1.4'), originalname: 'scanned.pdf' }, + { account: checkingAccount }, + ), + ).rejects.toThrow(/scanned image/i); + }); + + it('throws when no transaction rows can be detected', async () => { + __setPdfParseForTesting({ + parse: async () => ({ text: 'Just a header. No transactions here.' }), + }); + await expect( + parser.parse( + { buffer: Buffer.from('%PDF-1.4'), originalname: 'empty.pdf' }, + { account: checkingAccount }, + ), + ).rejects.toThrow(/No transactions detected/i); + }); + + it('wraps an underlying library error with a clear message', async () => { + __setPdfParseForTesting({ + parse: async () => { + throw new Error('boom'); + }, + }); + await expect( + parser.parse( + { buffer: Buffer.from('%PDF-1.4'), originalname: 'broken.pdf' }, + { account: checkingAccount }, + ), + ).rejects.toThrow(/Could not read this PDF: boom/); + }); + }); +}); diff --git a/tehriehlbudget-backend/src/statements/parsers/pdf.parser.ts b/tehriehlbudget-backend/src/statements/parsers/pdf.parser.ts new file mode 100644 index 0000000..cc40f39 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/parsers/pdf.parser.ts @@ -0,0 +1,212 @@ +import { AccountType } from '@prisma/client'; +import { + ParseOptions, + ParseResult, + ParsedRow, + ParserFileInput, + StatementParser, +} from './parser.interface'; + +const LIABILITY_TYPES: AccountType[] = [AccountType.CREDIT, AccountType.LOAN]; + +// Date patterns we recognize at the start of a transaction line. +const DATE_PATTERNS: { regex: RegExp; toIso: (m: RegExpMatchArray) => string }[] = [ + { + regex: /^(\d{4})-(\d{2})-(\d{2})/, + toIso: (m) => `${m[1]}-${m[2]}-${m[3]}`, + }, + { + regex: /^(\d{1,2})\/(\d{1,2})\/(\d{2,4})/, + toIso: (m) => { + const yyyy = m[3].length === 2 ? `20${m[3]}` : m[3]; + return `${yyyy}-${m[1].padStart(2, '0')}-${m[2].padStart(2, '0')}`; + }, + }, + { + regex: + /^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.?\s+(\d{1,2}),?\s+(\d{4})/i, + toIso: (m) => { + const months: Record = { + jan: '01', feb: '02', mar: '03', apr: '04', may: '05', jun: '06', + jul: '07', aug: '08', sep: '09', oct: '10', nov: '11', dec: '12', + }; + return `${m[3]}-${months[m[1].toLowerCase().slice(0, 3)]}-${m[2].padStart(2, '0')}`; + }, + }, +]; + +// Amount at the end of a line: optional sign, digits with optional thousands +// separators, optional decimals, optional trailing CR/DR marker. +const AMOUNT_REGEX = /(-?\$?\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+\.\d{2})(\s*(CR|DR))?\s*$/i; + +function parseAmount(input: string): number | null { + const cleaned = input.replace(/[$,\s]/g, ''); + const n = parseFloat(cleaned); + return isNaN(n) ? null : n; +} + +interface PdfTextResult { + text: string; + pages?: unknown[]; +} + +let cachedPdfParse: + | { parse: (buffer: Buffer) => Promise } + | null = null; + +async function loadPdfParser(): Promise<{ + parse: (buffer: Buffer) => Promise; +}> { + if (cachedPdfParse) return cachedPdfParse; + // Lazy-load via require so tests can mock it. The library is an ES module + // but ships a CJS build; we resolve through the package main. + // eslint-disable-next-line @typescript-eslint/no-require-imports + const mod = require('pdf-parse'); + // pdf-parse 2.x exposes `PDFParse` class; older 1.x exported a default + // function. Support both for resilience. + if (mod?.PDFParse) { + cachedPdfParse = { + parse: async (buffer: Buffer) => { + const parser = new mod.PDFParse({ data: buffer }); + const result = await parser.getText(); + try { + await parser.destroy(); + } catch { + /* ignore */ + } + return { text: result.text ?? '' }; + }, + }; + } else if (typeof mod === 'function' || typeof mod?.default === 'function') { + const fn = typeof mod === 'function' ? mod : mod.default; + cachedPdfParse = { + parse: async (buffer: Buffer) => { + const result = await fn(buffer); + return { text: result?.text ?? '' }; + }, + }; + } else { + throw new Error('PDF parser library is not available.'); + } + return cachedPdfParse; +} + +// Test seam — lets specs swap in a fake parser without touching require(). +export function __setPdfParseForTesting( + fake: { parse: (buffer: Buffer) => Promise } | null, +): void { + cachedPdfParse = fake; +} + +interface ExtractedLine { + date: string; + amountRaw: string; + description: string; +} + +function extractTransactionLines(text: string): ExtractedLine[] { + const out: ExtractedLine[] = []; + const lines = text.split(/\r?\n/); + for (const rawLine of lines) { + const line = rawLine.trim(); + if (!line) continue; + let date: string | null = null; + let rest = line; + for (const { regex, toIso } of DATE_PATTERNS) { + const match = line.match(regex); + if (match) { + date = toIso(match); + rest = line.slice(match[0].length).trim(); + break; + } + } + if (!date) continue; + const amountMatch = rest.match(AMOUNT_REGEX); + if (!amountMatch) continue; + const description = rest.slice(0, amountMatch.index).trim(); + if (!description) continue; + out.push({ date, amountRaw: amountMatch[1] + (amountMatch[2] ?? ''), description }); + } + return out; +} + +export class PdfParser implements StatementParser { + format = 'pdf' as const; + + canParse(file: ParserFileInput): boolean { + if (file.mimetype === 'application/pdf') return true; + if (file.originalname?.toLowerCase().endsWith('.pdf')) return true; + if (file.buffer.slice(0, 4).toString('ascii') === '%PDF') return true; + return false; + } + + async parse( + file: ParserFileInput, + options: ParseOptions, + ): Promise { + const lib = await loadPdfParser(); + let extracted: PdfTextResult; + try { + extracted = await lib.parse(file.buffer); + } catch (err) { + throw new Error( + err instanceof Error + ? `Could not read this PDF: ${err.message}` + : 'Could not read this PDF.', + ); + } + const text = extracted.text ?? ''; + if (!text.trim()) { + throw new Error( + 'This PDF appears to be a scanned image. Please use the CSV or OFX/QFX export from your bank instead.', + ); + } + + const lines = extractTransactionLines(text); + if (lines.length === 0) { + throw new Error( + 'No transactions detected in this PDF. Try the CSV or OFX/QFX export from your bank for more reliable parsing.', + ); + } + + const isLiability = LIABILITY_TYPES.includes(options.account.type); + const rows: ParsedRow[] = []; + const warnings: string[] = []; + + lines.forEach((line, sourceIndex) => { + const trailingMarker = line.amountRaw.match(/(CR|DR)$/i)?.[1]?.toUpperCase(); + const cleanedAmount = line.amountRaw.replace(/(CR|DR)$/i, ''); + const num = parseAmount(cleanedAmount); + if (num === null || num === 0) { + warnings.push(`Row ${sourceIndex + 1}: could not parse amount`); + return; + } + let positive: boolean; + if (trailingMarker === 'CR') positive = true; + else if (trailingMarker === 'DR') positive = false; + else positive = num > 0; + const absAmount = Math.abs(num); + + let type: 'INCOME' | 'EXPENSE'; + if (isLiability) { + // Credit-card / loan: positive amount = charge = EXPENSE (debt grows); + // negative or CR-marked amount = payment/refund = INCOME. + type = positive ? 'EXPENSE' : 'INCOME'; + } else { + type = positive ? 'INCOME' : 'EXPENSE'; + } + + rows.push({ + sourceIndex, + date: line.date, + amount: Math.round(absAmount * 100) / 100, + type, + description: line.description, + rawMemo: line.description, + confidence: 0.6, + }); + }); + + return { rows, warnings }; + } +} diff --git a/tehriehlbudget-backend/src/statements/statements.controller.spec.ts b/tehriehlbudget-backend/src/statements/statements.controller.spec.ts new file mode 100644 index 0000000..07d82ed --- /dev/null +++ b/tehriehlbudget-backend/src/statements/statements.controller.spec.ts @@ -0,0 +1,84 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { BadRequestException } from '@nestjs/common'; +import { StatementsController } from './statements.controller'; +import { StatementsService } from './statements.service'; +import { AuthGuard } from '../auth/auth.guard'; + +describe('StatementsController', () => { + let controller: StatementsController; + const mockUser = { id: 'user-1' } as any; + const mockService = { + parse: jest.fn().mockResolvedValue({ + format: 'csv', + account: { id: 'acc-1', name: 'Checking', type: 'CHECKING' }, + rows: [], + warnings: [], + }), + }; + + beforeEach(async () => { + jest.resetAllMocks(); + mockService.parse.mockResolvedValue({ + format: 'csv', + account: { id: 'acc-1', name: 'Checking', type: 'CHECKING' }, + rows: [], + warnings: [], + }); + const module: TestingModule = await Test.createTestingModule({ + controllers: [StatementsController], + providers: [{ provide: StatementsService, useValue: mockService }], + }) + .overrideGuard(AuthGuard) + .useValue({ canActivate: () => true }) + .compile(); + controller = module.get(StatementsController); + }); + + it('forwards the upload buffer + accountId to the service', async () => { + const file: any = { + buffer: Buffer.from('Date,Amount\n2026-04-01,10\n'), + mimetype: 'text/csv', + originalname: 'x.csv', + }; + await controller.parse(mockUser, file, { + accountId: 'acc-1', + } as any); + expect(mockService.parse).toHaveBeenCalledWith( + 'user-1', + expect.objectContaining({ + buffer: file.buffer, + mimetype: 'text/csv', + originalname: 'x.csv', + }), + 'acc-1', + undefined, + ); + }); + + it('rejects the request when no file is attached', () => { + expect(() => + controller.parse(mockUser, undefined as any, { + accountId: 'acc-1', + } as any), + ).toThrow(BadRequestException); + }); + + it('passes mapping through to the service when provided', async () => { + const file: any = { + buffer: Buffer.from('a,b\n1,2'), + mimetype: 'text/csv', + originalname: 'x.csv', + }; + const mapping = { date: 'a', amount: 'b', description: 'a' } as any; + await controller.parse(mockUser, file, { + accountId: 'acc-1', + mapping, + } as any); + expect(mockService.parse).toHaveBeenCalledWith( + 'user-1', + expect.anything(), + 'acc-1', + mapping, + ); + }); +}); diff --git a/tehriehlbudget-backend/src/statements/statements.controller.ts b/tehriehlbudget-backend/src/statements/statements.controller.ts new file mode 100644 index 0000000..a34ba36 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/statements.controller.ts @@ -0,0 +1,49 @@ +import { + BadRequestException, + Body, + Controller, + Post, + UploadedFile, + UseGuards, + UseInterceptors, +} from '@nestjs/common'; +import { FileInterceptor } from '@nestjs/platform-express'; +import { StatementsService } from './statements.service'; +import { ParseStatementDto } from './dto/parse-statement.dto'; +import { AuthGuard } from '../auth/auth.guard'; +import { CurrentUser } from '../auth/user.decorator'; +import type { User } from '@prisma/client'; + +const STATEMENT_FILE_SIZE_LIMIT = 10 * 1024 * 1024; + +@Controller('statements') +@UseGuards(AuthGuard) +export class StatementsController { + constructor(private readonly statementsService: StatementsService) {} + + @Post('parse') + @UseInterceptors( + FileInterceptor('file', { + limits: { fileSize: STATEMENT_FILE_SIZE_LIMIT }, + }), + ) + parse( + @CurrentUser() user: User, + @UploadedFile() file: Express.Multer.File | undefined, + @Body() body: ParseStatementDto, + ) { + if (!file) { + throw new BadRequestException('No file uploaded'); + } + return this.statementsService.parse( + user.id, + { + buffer: file.buffer, + mimetype: file.mimetype, + originalname: file.originalname, + }, + body.accountId, + body.mapping, + ); + } +} diff --git a/tehriehlbudget-backend/src/statements/statements.module.ts b/tehriehlbudget-backend/src/statements/statements.module.ts new file mode 100644 index 0000000..076b8a0 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/statements.module.ts @@ -0,0 +1,21 @@ +import { Module } from '@nestjs/common'; +import { StatementsService } from './statements.service'; +import { StatementsController } from './statements.controller'; +import { DuplicateDetectorService } from './duplicate-detector.service'; +import { CsvParser } from './parsers/csv.parser'; +import { OfxParser } from './parsers/ofx.parser'; +import { PdfParser } from './parsers/pdf.parser'; +import { AuthModule } from '../auth/auth.module'; + +@Module({ + imports: [AuthModule], + controllers: [StatementsController], + providers: [ + StatementsService, + DuplicateDetectorService, + CsvParser, + OfxParser, + PdfParser, + ], +}) +export class StatementsModule {} diff --git a/tehriehlbudget-backend/src/statements/statements.service.spec.ts b/tehriehlbudget-backend/src/statements/statements.service.spec.ts new file mode 100644 index 0000000..dc0484b --- /dev/null +++ b/tehriehlbudget-backend/src/statements/statements.service.spec.ts @@ -0,0 +1,286 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { BadRequestException, NotFoundException } from '@nestjs/common'; +import { StatementsService } from './statements.service'; +import { PrismaService } from '../prisma/prisma.service'; +import { DuplicateDetectorService } from './duplicate-detector.service'; +import { CsvParser } from './parsers/csv.parser'; +import { OfxParser } from './parsers/ofx.parser'; +import { PdfParser } from './parsers/pdf.parser'; +import { AccountType } from '@prisma/client'; + +jest.mock('@prisma/client', () => ({ + PrismaClient: class {}, + AccountType: { + CHECKING: 'CHECKING', + SAVINGS: 'SAVINGS', + CREDIT: 'CREDIT', + LOAN: 'LOAN', + }, +})); + +describe('StatementsService', () => { + let service: StatementsService; + const account = { + id: 'acc-1', + name: 'Chase Checking', + type: AccountType.CHECKING, + }; + const mockPrisma: any = { + account: { findFirst: jest.fn() }, + }; + const mockDedupe: any = { + classify: jest.fn(), + }; + + const stubParser = (over: any = {}) => ({ + format: 'csv', + canParse: jest.fn(() => false), + parse: jest.fn(async () => ({ rows: [], warnings: [] })), + ...over, + }); + + let csv: any; + let ofx: any; + let pdf: any; + + beforeEach(async () => { + jest.resetAllMocks(); + csv = stubParser({ format: 'csv' }); + ofx = stubParser({ format: 'ofx' }); + pdf = stubParser({ format: 'pdf' }); + + const module: TestingModule = await Test.createTestingModule({ + providers: [ + StatementsService, + { provide: PrismaService, useValue: mockPrisma }, + { provide: DuplicateDetectorService, useValue: mockDedupe }, + { provide: CsvParser, useValue: csv }, + { provide: OfxParser, useValue: ofx }, + { provide: PdfParser, useValue: pdf }, + ], + }).compile(); + service = module.get(StatementsService); + }); + + it('rejects an empty buffer', async () => { + await expect( + service.parse('u1', { buffer: Buffer.alloc(0) }, 'acc-1'), + ).rejects.toThrow(BadRequestException); + }); + + it('rejects when the account is not owned by the user', async () => { + mockPrisma.account.findFirst.mockResolvedValue(null); + await expect( + service.parse('u1', { buffer: Buffer.from('x') }, 'acc-1'), + ).rejects.toThrow(NotFoundException); + }); + + it('rejects when no parser can handle the file', async () => { + mockPrisma.account.findFirst.mockResolvedValue(account); + csv.canParse.mockReturnValue(false); + ofx.canParse.mockReturnValue(false); + pdf.canParse.mockReturnValue(false); + await expect( + service.parse( + 'u1', + { buffer: Buffer.from('not a statement'), mimetype: 'text/plain' }, + 'acc-1', + ), + ).rejects.toThrow(/Unsupported file format/); + }); + + it('dispatches to the first parser that can handle the file', async () => { + mockPrisma.account.findFirst.mockResolvedValue(account); + ofx.canParse.mockReturnValue(true); + ofx.parse.mockResolvedValue({ + rows: [ + { + sourceIndex: 0, + date: '2026-04-10', + amount: 50, + type: 'EXPENSE', + description: 'Test', + confidence: 0.98, + }, + ], + warnings: [], + }); + mockDedupe.classify.mockResolvedValue( + new Map([[0, { status: 'new', confidence: 0.98 }]]), + ); + + const result = await service.parse( + 'u1', + { buffer: Buffer.from('OFXHEADER:100'), originalname: 'x.ofx' }, + 'acc-1', + ); + + expect(result.format).toBe('ofx'); + expect(result.account).toEqual(account); + expect(result.rows).toHaveLength(1); + expect(result.rows[0].status).toBe('new'); + expect(csv.parse).not.toHaveBeenCalled(); + expect(pdf.parse).not.toHaveBeenCalled(); + }); + + it('returns needsMapping without running dedupe', async () => { + mockPrisma.account.findFirst.mockResolvedValue(account); + csv.canParse.mockReturnValue(true); + csv.parse.mockResolvedValue({ + rows: [], + warnings: ['some warning'], + needsMapping: { headers: ['A', 'B'], sample: [['1', '2']], guess: {} }, + }); + + const result = await service.parse( + 'u1', + { buffer: Buffer.from('A,B\n1,2'), originalname: 'x.csv' }, + 'acc-1', + ); + + expect(result.needsMapping).toBeDefined(); + expect(result.rows).toHaveLength(0); + expect(mockDedupe.classify).not.toHaveBeenCalled(); + }); + + it('rewrites parser errors as BadRequestException with the original message', async () => { + mockPrisma.account.findFirst.mockResolvedValue(account); + pdf.canParse.mockReturnValue(true); + pdf.parse.mockRejectedValue(new Error('Scanned PDF — not supported')); + + await expect( + service.parse( + 'u1', + { buffer: Buffer.from('%PDF-1.4'), originalname: 'x.pdf' }, + 'acc-1', + ), + ).rejects.toThrow(/Scanned PDF/); + }); + + it('passes mapping through to the parser', async () => { + mockPrisma.account.findFirst.mockResolvedValue(account); + csv.canParse.mockReturnValue(true); + csv.parse.mockResolvedValue({ rows: [], warnings: [] }); + mockDedupe.classify.mockResolvedValue(new Map()); + + const mapping = { date: 'When', description: 'What', amount: 'How Much' }; + await service.parse( + 'u1', + { buffer: Buffer.from('x'), originalname: 'x.csv' }, + 'acc-1', + mapping, + ); + + expect(csv.parse).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ mapping, account }), + ); + }); + + it('defaults each row to new when the classifier returns no entry for it', async () => { + mockPrisma.account.findFirst.mockResolvedValue(account); + csv.canParse.mockReturnValue(true); + csv.parse.mockResolvedValue({ + rows: [ + { + sourceIndex: 0, + date: '2026-04-10', + amount: 25, + type: 'EXPENSE', + description: 'Coffee', + confidence: 0.7, + }, + ], + warnings: [], + }); + // Classifier returns an empty map — code must fall back to row.confidence. + mockDedupe.classify.mockResolvedValue(new Map()); + + const result = await service.parse( + 'u1', + { buffer: Buffer.from('x'), originalname: 'x.csv' }, + 'acc-1', + ); + expect(result.rows[0].status).toBe('new'); + expect(result.rows[0].confidence).toBe(0.7); + }); + + it('uses a generic message when a parser throws a non-Error value', async () => { + mockPrisma.account.findFirst.mockResolvedValue(account); + csv.canParse.mockReturnValue(true); + csv.parse.mockRejectedValue('string-not-an-error'); + await expect( + service.parse( + 'u1', + { buffer: Buffer.from('x'), originalname: 'x.csv' }, + 'acc-1', + ), + ).rejects.toThrow(/Unable to read/); + }); + + it('merges duplicate-detector status, confidence, and metadata into each row', async () => { + mockPrisma.account.findFirst.mockResolvedValue(account); + csv.canParse.mockReturnValue(true); + csv.parse.mockResolvedValue({ + rows: [ + { + sourceIndex: 0, + date: '2026-04-10', + amount: 25, + type: 'EXPENSE', + description: 'Coffee', + confidence: 0.95, + }, + { + sourceIndex: 1, + date: '2026-04-11', + amount: 200, + type: 'EXPENSE', + description: 'Transfer', + confidence: 0.95, + }, + ], + warnings: [], + }); + mockDedupe.classify.mockResolvedValue( + new Map([ + [ + 0, + { + status: 'duplicate', + confidence: 1, + duplicateOf: { + id: 'existing-1', + date: '2026-04-10', + amount: 25, + description: 'Coffee', + }, + }, + ], + [ + 1, + { + status: 'possible_transfer', + confidence: 0.85, + transferCandidate: { + accountId: 'acc-2', + accountName: 'Savings', + matchedTransactionId: 'cross-1', + }, + }, + ], + ]), + ); + + const result = await service.parse( + 'u1', + { buffer: Buffer.from('x'), originalname: 'x.csv' }, + 'acc-1', + ); + + expect(result.rows[0].status).toBe('duplicate'); + expect(result.rows[0].duplicateOf?.id).toBe('existing-1'); + expect(result.rows[1].status).toBe('possible_transfer'); + expect(result.rows[1].transferCandidate?.accountName).toBe('Savings'); + }); +}); diff --git a/tehriehlbudget-backend/src/statements/statements.service.ts b/tehriehlbudget-backend/src/statements/statements.service.ts new file mode 100644 index 0000000..d2840d5 --- /dev/null +++ b/tehriehlbudget-backend/src/statements/statements.service.ts @@ -0,0 +1,131 @@ +import { + BadRequestException, + Injectable, + NotFoundException, +} from '@nestjs/common'; +import { PrismaService } from '../prisma/prisma.service'; +import { CsvParser } from './parsers/csv.parser'; +import { OfxParser } from './parsers/ofx.parser'; +import { PdfParser } from './parsers/pdf.parser'; +import { + ColumnMapping, + ParseResult, + ParserFileInput, + StatementParser, +} from './parsers/parser.interface'; +import { + ClassifiedRow, + DuplicateDetectorService, +} from './duplicate-detector.service'; + +export interface ParsedTransactionResponse { + sourceIndex: number; + date: string; + amount: number; + type: 'INCOME' | 'EXPENSE'; + description: string; + externalId?: string; + status: ClassifiedRow['status']; + confidence: number; + duplicateOf?: ClassifiedRow['duplicateOf']; + transferCandidate?: ClassifiedRow['transferCandidate']; +} + +export interface ParseStatementResponse { + format: 'csv' | 'ofx' | 'pdf'; + account: { id: string; name: string; type: string }; + rows: ParsedTransactionResponse[]; + warnings: string[]; + needsMapping?: ParseResult['needsMapping']; +} + +@Injectable() +export class StatementsService { + private readonly parsers: StatementParser[]; + + constructor( + private prisma: PrismaService, + private duplicateDetector: DuplicateDetectorService, + csv: CsvParser, + ofx: OfxParser, + pdf: PdfParser, + ) { + // Order matters: OFX is sniffed first (most specific), then CSV, then PDF. + this.parsers = [ofx, csv, pdf]; + } + + async parse( + userId: string, + file: ParserFileInput, + accountId: string, + mapping?: ColumnMapping, + ): Promise { + if (!file?.buffer?.length) { + throw new BadRequestException('No file uploaded'); + } + const account = await this.prisma.account.findFirst({ + where: { id: accountId, userId }, + select: { id: true, name: true, type: true }, + }); + if (!account) { + throw new NotFoundException('Account not found'); + } + + const parser = this.parsers.find((p) => p.canParse(file)); + if (!parser) { + throw new BadRequestException( + 'Unsupported file format. Please upload a CSV, OFX, QFX, or PDF statement.', + ); + } + + let parsed: ParseResult; + try { + parsed = await parser.parse(file, { account, mapping }); + } catch (err) { + throw new BadRequestException( + err instanceof Error + ? err.message + : 'Unable to read this statement file.', + ); + } + + if (parsed.needsMapping) { + return { + format: parser.format, + account, + rows: [], + warnings: parsed.warnings, + needsMapping: parsed.needsMapping, + }; + } + + const classifications = await this.duplicateDetector.classify( + userId, + accountId, + parsed.rows, + ); + + const rows: ParsedTransactionResponse[] = parsed.rows.map((r) => { + const c = classifications.get(r.sourceIndex); + return { + sourceIndex: r.sourceIndex, + date: r.date, + amount: r.amount, + type: r.type, + description: r.description, + externalId: r.externalId, + status: c?.status ?? 'new', + confidence: c?.confidence ?? r.confidence, + duplicateOf: c?.duplicateOf, + transferCandidate: c?.transferCandidate, + }; + }); + + return { + format: parser.format, + account, + rows, + warnings: parsed.warnings, + }; + } +} diff --git a/tehriehlbudget-backend/src/transactions/dto/bulk-create-transactions.dto.ts b/tehriehlbudget-backend/src/transactions/dto/bulk-create-transactions.dto.ts new file mode 100644 index 0000000..e551d2f --- /dev/null +++ b/tehriehlbudget-backend/src/transactions/dto/bulk-create-transactions.dto.ts @@ -0,0 +1,32 @@ +import { Type } from 'class-transformer'; +import { + ArrayMaxSize, + ArrayMinSize, + IsArray, + IsOptional, + IsString, + MaxLength, + ValidateNested, +} from 'class-validator'; +import { CreateTransactionDto } from './create-transaction.dto'; + +export const BULK_TRANSACTION_MAX = 500; + +export class BulkCreateTransactionsDto { + @IsArray() + @ArrayMinSize(1) + @ArrayMaxSize(BULK_TRANSACTION_MAX) + @ValidateNested({ each: true }) + @Type(() => CreateTransactionDto) + transactions: CreateTransactionDto[]; + + @IsOptional() + @IsString() + @MaxLength(200) + source?: string; + + @IsOptional() + @IsString() + @MaxLength(200) + sourceLabel?: string; +} diff --git a/tehriehlbudget-backend/src/transactions/dto/create-transaction.dto.ts b/tehriehlbudget-backend/src/transactions/dto/create-transaction.dto.ts index 698b0b3..02d49d1 100644 --- a/tehriehlbudget-backend/src/transactions/dto/create-transaction.dto.ts +++ b/tehriehlbudget-backend/src/transactions/dto/create-transaction.dto.ts @@ -41,4 +41,8 @@ export class CreateTransactionDto { @IsString() @IsOptional() receiptPath?: string; + + @IsString() + @IsOptional() + externalId?: string; } diff --git a/tehriehlbudget-backend/src/transactions/transactions.controller.spec.ts b/tehriehlbudget-backend/src/transactions/transactions.controller.spec.ts index ae8c5e7..8f49a79 100644 --- a/tehriehlbudget-backend/src/transactions/transactions.controller.spec.ts +++ b/tehriehlbudget-backend/src/transactions/transactions.controller.spec.ts @@ -39,6 +39,9 @@ describe('TransactionsController', () => { const mockService = { create: jest.fn().mockResolvedValue(mockTransaction), + createMany: jest + .fn() + .mockResolvedValue({ created: 2, ids: ['txn-1', 'txn-2'] }), findAll: jest.fn().mockResolvedValue({ data: [mockTransaction], total: 1, @@ -106,4 +109,36 @@ describe('TransactionsController', () => { expect(mockService.remove).toHaveBeenCalledWith('user-123', 'txn-1'); expect(result).toEqual(mockTransaction); }); + + it('forwards bulk imports to the service with source metadata', async () => { + const dto = { + transactions: [ + { + accountId: 'acc-1', + amount: 10, + type: TransactionType.EXPENSE, + description: 'A', + date: '2026-04-01', + }, + { + accountId: 'acc-1', + amount: 20, + type: TransactionType.EXPENSE, + description: 'B', + date: '2026-04-02', + }, + ], + source: 'statement-import', + sourceLabel: 'chase-2026-04.csv', + } as any; + + const result = await controller.bulk(mockUser, dto); + + expect(mockService.createMany).toHaveBeenCalledWith( + 'user-123', + dto.transactions, + { source: 'statement-import', sourceLabel: 'chase-2026-04.csv' }, + ); + expect(result.created).toBe(2); + }); }); diff --git a/tehriehlbudget-backend/src/transactions/transactions.controller.ts b/tehriehlbudget-backend/src/transactions/transactions.controller.ts index a67d8b8..0625acc 100644 --- a/tehriehlbudget-backend/src/transactions/transactions.controller.ts +++ b/tehriehlbudget-backend/src/transactions/transactions.controller.ts @@ -13,6 +13,7 @@ import { TransactionsService } from './transactions.service'; import type { TransactionFilters } from './transactions.service'; import { CreateTransactionDto } from './dto/create-transaction.dto'; import { UpdateTransactionDto } from './dto/update-transaction.dto'; +import { BulkCreateTransactionsDto } from './dto/bulk-create-transactions.dto'; import { AuthGuard } from '../auth/auth.guard'; import { CurrentUser } from '../auth/user.decorator'; import type { User } from '@prisma/client'; @@ -27,6 +28,17 @@ export class TransactionsController { return this.transactionsService.create(user.id, dto); } + @Post('bulk') + bulk( + @CurrentUser() user: User, + @Body() dto: BulkCreateTransactionsDto, + ) { + return this.transactionsService.createMany(user.id, dto.transactions, { + source: dto.source, + sourceLabel: dto.sourceLabel, + }); + } + @Get() findAll(@CurrentUser() user: User, @Query() filters: TransactionFilters) { return this.transactionsService.findAll(user.id, filters); diff --git a/tehriehlbudget-backend/src/transactions/transactions.service.spec.ts b/tehriehlbudget-backend/src/transactions/transactions.service.spec.ts index 4774c27..0ca6b93 100644 --- a/tehriehlbudget-backend/src/transactions/transactions.service.spec.ts +++ b/tehriehlbudget-backend/src/transactions/transactions.service.spec.ts @@ -714,4 +714,242 @@ describe('TransactionsService', () => { expect(call.take).toBe(10000); }); }); + + describe('createMany', () => { + const incomeDto = (over: Partial = {}) => ({ + accountId: 'acc-1', + amount: 100, + type: TransactionType.INCOME, + description: 'Paycheck', + date: '2026-04-01', + ...over, + }); + const expenseDto = (over: Partial = {}) => ({ + accountId: 'acc-1', + amount: 25, + type: TransactionType.EXPENSE, + description: 'Coffee', + date: '2026-04-02', + ...over, + }); + + it('creates every row and returns the new ids', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + ]); + let n = 0; + txClient.transaction.create.mockImplementation(async () => ({ + ...baseTxn, + id: `txn-${++n}`, + })); + + const result = await service.createMany(userId, [ + incomeDto({ description: 'A' }), + expenseDto({ description: 'B' }), + expenseDto({ description: 'C' }), + ]); + + expect(result.created).toBe(3); + expect(result.ids).toEqual(['txn-1', 'txn-2', 'txn-3']); + expect(txClient.transaction.create).toHaveBeenCalledTimes(3); + }); + + it('applies balance deltas to each account, asset and liability mixed', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + { id: 'acc-cc', type: AccountType.CREDIT }, + ]); + txClient.transaction.create.mockImplementation(async () => ({ + ...baseTxn, + })); + + await service.createMany(userId, [ + incomeDto({ accountId: 'acc-1', amount: 500 }), + expenseDto({ accountId: 'acc-1', amount: 30 }), + expenseDto({ accountId: 'acc-cc', amount: 70 }), + ]); + + // Asset +500 INCOME, -30 EXPENSE + expect(txClient.account.update).toHaveBeenCalledWith({ + where: { id: 'acc-1' }, + data: { balance: { increment: 500 } }, + }); + expect(txClient.account.update).toHaveBeenCalledWith({ + where: { id: 'acc-1' }, + data: { balance: { decrement: 30 } }, + }); + // Liability +70 EXPENSE (debt grows) + expect(txClient.account.update).toHaveBeenCalledWith({ + where: { id: 'acc-cc' }, + data: { balance: { increment: 70 } }, + }); + }); + + it('rejects when any row references an account not owned by the user', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + ]); + await expect( + service.createMany(userId, [ + incomeDto(), + expenseDto({ accountId: 'stranger' }), + ]), + ).rejects.toThrow(NotFoundException); + }); + + it('validates transfer rows require a destination', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + ]); + await expect( + service.createMany(userId, [ + { ...incomeDto(), type: TransactionType.TRANSFER } as any, + ]), + ).rejects.toThrow(BadRequestException); + }); + + it('handles TRANSFER rows updating both source and destination', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + { id: 'acc-2', type: AccountType.SAVINGS }, + ]); + txClient.transaction.create.mockImplementation(async () => ({ + ...baseTxn, + accountId: 'acc-1', + destinationAccountId: 'acc-2', + amount: 200, + type: TransactionType.TRANSFER, + })); + + await service.createMany(userId, [ + { + accountId: 'acc-1', + destinationAccountId: 'acc-2', + amount: 200, + type: TransactionType.TRANSFER, + description: 'Move', + date: '2026-04-03', + } as any, + ]); + + expect(txClient.account.update).toHaveBeenCalledWith({ + where: { id: 'acc-1' }, + data: { balance: { decrement: 200 } }, + }); + expect(txClient.account.update).toHaveBeenCalledWith({ + where: { id: 'acc-2' }, + data: { balance: { increment: 200 } }, + }); + }); + + it('chunks into batches of 50 to keep $transaction calls bounded', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + ]); + txClient.transaction.create.mockImplementation(async () => ({ + ...baseTxn, + })); + + const rows = Array.from({ length: 120 }, () => expenseDto()); + await service.createMany(userId, rows); + + // 120 rows / 50 per chunk = 3 chunks (50 + 50 + 20) + expect(mockPrisma.$transaction).toHaveBeenCalledTimes(3); + }); + + it('writes one ActivityLog entry per chunk, not per transaction', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + ]); + txClient.transaction.create.mockImplementation(async () => ({ + ...baseTxn, + })); + + const rows = Array.from({ length: 60 }, () => expenseDto()); + await service.createMany( + userId, + rows, + { source: 'statement-import', sourceLabel: 'chase.csv' }, + ); + + // 60 rows = 2 chunks, expect 2 log entries (not 60) + expect(mockActivityLog.log).toHaveBeenCalledTimes(2); + expect(mockActivityLog.log).toHaveBeenCalledWith( + expect.objectContaining({ + userId, + entityType: 'TRANSACTION', + action: 'CREATE', + summary: expect.stringMatching(/Imported \d+ transaction.*chase\.csv/), + }), + ); + }); + + it('encrypts notes per row when provided', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + ]); + txClient.transaction.create.mockImplementation(async () => ({ + ...baseTxn, + })); + + await service.createMany(userId, [ + expenseDto({ notes: 'private' }), + expenseDto({ notes: 'also private' }), + ]); + + expect(mockEncryption.encryptField).toHaveBeenCalledWith('private'); + expect(mockEncryption.encryptField).toHaveBeenCalledWith('also private'); + }); + + it('persists externalId when provided', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + ]); + txClient.transaction.create.mockImplementation(async () => ({ + ...baseTxn, + })); + + await service.createMany(userId, [ + expenseDto({ externalId: 'BANK-FITID-1' }), + ]); + + const data = txClient.transaction.create.mock.calls[0][0].data; + expect(data.externalId).toBe('BANK-FITID-1'); + }); + + it('rejects an empty input array', async () => { + await expect(service.createMany(userId, [])).rejects.toThrow( + BadRequestException, + ); + }); + + it('reports actual created count when a chunk fails partway', async () => { + mockPrisma.account.findMany.mockResolvedValue([ + { id: 'acc-1', type: AccountType.CHECKING }, + ]); + let chunkIndex = 0; + mockPrisma.$transaction = jest.fn(async (cb: any) => { + // Second chunk throws to simulate partial failure across chunks. + chunkIndex += 1; + if (chunkIndex === 2) { + throw new Error('boom'); + } + return cb(txClient); + }); + txClient.transaction.create.mockImplementation(async () => ({ + ...baseTxn, + id: 'txn-x', + })); + + const rows = Array.from({ length: 60 }, () => expenseDto()); + const result = await service.createMany(userId, rows); + // First chunk (50) succeeded; second chunk (10) threw and is discarded. + expect(result.created).toBe(50); + expect(result.partial).toEqual({ + attempted: 60, + failed: 10, + error: expect.stringContaining('boom'), + }); + }); + }); }); diff --git a/tehriehlbudget-backend/src/transactions/transactions.service.ts b/tehriehlbudget-backend/src/transactions/transactions.service.ts index d641675..eecf885 100644 --- a/tehriehlbudget-backend/src/transactions/transactions.service.ts +++ b/tehriehlbudget-backend/src/transactions/transactions.service.ts @@ -17,6 +17,20 @@ import { } from '@prisma/client'; export const EXPORT_ROW_CAP = 10000; +export const BULK_CHUNK_SIZE = 50; +export const BULK_TX_TIMEOUT_MS = 15_000; +export const BULK_TX_MAX_WAIT_MS = 5_000; + +export interface BulkSource { + source?: string; + sourceLabel?: string; +} + +export interface BulkCreateResult { + created: number; + ids: string[]; + partial?: { attempted: number; failed: number; error: string }; +} export interface TransactionFilters { accountId?: string; @@ -213,6 +227,147 @@ export class TransactionsService { return this.decryptTransaction(txn); } + async createMany( + userId: string, + dtos: CreateTransactionDto[], + source: BulkSource = {}, + ): Promise { + if (!dtos || dtos.length === 0) { + throw new BadRequestException('At least one transaction is required'); + } + + const accountIds = new Set(); + for (const dto of dtos) { + accountIds.add(dto.accountId); + if (dto.type === TransactionType.TRANSFER) { + if (!dto.destinationAccountId) { + throw new BadRequestException( + 'destinationAccountId is required for TRANSFER transactions', + ); + } + if (dto.destinationAccountId === dto.accountId) { + throw new BadRequestException( + 'Source and destination accounts must differ', + ); + } + accountIds.add(dto.destinationAccountId); + } + } + + const accounts = await this.prisma.account.findMany({ + where: { userId, id: { in: Array.from(accountIds) } }, + select: { id: true, type: true }, + }); + if (accounts.length !== accountIds.size) { + throw new NotFoundException('One or more accounts not found'); + } + const typeById = new Map(accounts.map((a) => [a.id, a.type])); + + const allIds: string[] = []; + let partial: BulkCreateResult['partial']; + + for (let i = 0; i < dtos.length; i += BULK_CHUNK_SIZE) { + const chunk = dtos.slice(i, i + BULK_CHUNK_SIZE); + try { + const chunkIds = await this.prisma.$transaction( + async (tx) => { + const ids: string[] = []; + for (const dto of chunk) { + const created = await this.applyTransactionWithinTx( + tx, + userId, + dto, + typeById, + ); + ids.push(created.id); + } + await this.activityLog.log({ + userId, + entityType: EntityType.TRANSACTION, + entityId: ids[0], + action: ActivityAction.CREATE, + summary: this.bulkImportSummary(ids.length, source), + snapshot: { + count: ids.length, + ids, + source: source.source ?? 'bulk', + label: source.sourceLabel ?? null, + }, + tx, + }); + return ids; + }, + { timeout: BULK_TX_TIMEOUT_MS, maxWait: BULK_TX_MAX_WAIT_MS }, + ); + allIds.push(...chunkIds); + } catch (err) { + const remaining = dtos.length - i; + partial = { + attempted: dtos.length, + failed: remaining, + error: err instanceof Error ? err.message : String(err), + }; + break; + } + } + + return partial + ? { created: allIds.length, ids: allIds, partial } + : { created: allIds.length, ids: allIds }; + } + + private async applyTransactionWithinTx( + tx: Prisma.TransactionClient, + userId: string, + dto: CreateTransactionDto, + typeById: Map, + ): Promise<{ id: string }> { + const data: Prisma.TransactionUncheckedCreateInput = { + ...dto, + userId, + date: parseDateInput(dto.date), + }; + if (data.notes) { + data.notes = this.encryption.encryptField(data.notes); + } + if (dto.type !== TransactionType.TRANSFER) { + data.destinationAccountId = null; + } + + const created = await tx.transaction.create({ data }); + + const primaryType = typeById.get(dto.accountId)!; + await tx.account.update({ + where: { id: dto.accountId }, + data: { + balance: asPrismaUpdate( + signedDelta(primaryType, 'primary', dto.type, dto.amount), + ), + }, + }); + if (dto.type === TransactionType.TRANSFER && dto.destinationAccountId) { + const destType = typeById.get(dto.destinationAccountId)!; + await tx.account.update({ + where: { id: dto.destinationAccountId }, + data: { + balance: asPrismaUpdate( + signedDelta(destType, 'destination', dto.type, dto.amount), + ), + }, + }); + } + + return { id: created.id }; + } + + private bulkImportSummary(count: number, source: BulkSource): string { + const noun = count === 1 ? 'transaction' : 'transactions'; + if (source.sourceLabel) { + return `Imported ${count} ${noun} from ${source.sourceLabel}`; + } + return `Imported ${count} ${noun}`; + } + async findAll(userId: string, filters: TransactionFilters) { const { accountId, categoryId, type, startDate, endDate } = filters; const all = filters.all === true || filters.all === 'true'; diff --git a/tehriehlbudget-backend/test/fixtures/statements/amex-credit.csv b/tehriehlbudget-backend/test/fixtures/statements/amex-credit.csv new file mode 100644 index 0000000..693399a --- /dev/null +++ b/tehriehlbudget-backend/test/fixtures/statements/amex-credit.csv @@ -0,0 +1,5 @@ +Date,Description,Amount +2026-04-02,RESTAURANT XYZ,52.10 +2026-04-03,GROCERY OUTLET,89.45 +2026-04-04,AUTOPAY PAYMENT - THANK YOU,-450.00 +2026-04-05,GAS STATION 24,40.00 diff --git a/tehriehlbudget-backend/test/fixtures/statements/boa-debit-credit.csv b/tehriehlbudget-backend/test/fixtures/statements/boa-debit-credit.csv new file mode 100644 index 0000000..06ba813 --- /dev/null +++ b/tehriehlbudget-backend/test/fixtures/statements/boa-debit-credit.csv @@ -0,0 +1,5 @@ +Date,Description,Debit,Credit +2026-04-02,Whole Foods Market,82.41, +2026-04-03,Refund - Returned Item,,15.00 +2026-04-04,Gas Station #12,38.20, +2026-04-05,Interest Earned,,3.42 diff --git a/tehriehlbudget-backend/test/fixtures/statements/chase-signed.csv b/tehriehlbudget-backend/test/fixtures/statements/chase-signed.csv new file mode 100644 index 0000000..9b10c0b --- /dev/null +++ b/tehriehlbudget-backend/test/fixtures/statements/chase-signed.csv @@ -0,0 +1,5 @@ +Posting Date,Description,Amount,Type,Balance +04/02/2026,"AMZN MKTP US*ABC123",-42.10,Sale,1957.90 +04/03/2026,"DIRECT DEPOSIT PAYROLL",2500.00,Direct Deposit,4457.90 +04/04/2026,"STARBUCKS #4321",-6.75,Sale,4451.15 +04/05/2026,"VENMO PAYMENT",-150.00,Misc Debit,4301.15 diff --git a/tehriehlbudget-backend/test/fixtures/statements/generic-unknown-headers.csv b/tehriehlbudget-backend/test/fixtures/statements/generic-unknown-headers.csv new file mode 100644 index 0000000..7dcd75d --- /dev/null +++ b/tehriehlbudget-backend/test/fixtures/statements/generic-unknown-headers.csv @@ -0,0 +1,4 @@ +When,What,How Much,Direction +2026-04-02,Coffee Shop,4.50,out +2026-04-03,Side Gig,200.00,in +2026-04-04,Bookstore,28.75,out diff --git a/tehriehlbudget-backend/test/fixtures/statements/sample-credit-card.qfx b/tehriehlbudget-backend/test/fixtures/statements/sample-credit-card.qfx new file mode 100644 index 0000000..4c57492 --- /dev/null +++ b/tehriehlbudget-backend/test/fixtures/statements/sample-credit-card.qfx @@ -0,0 +1,60 @@ +OFXHEADER:100 +DATA:OFXSGML +VERSION:102 +SECURITY:NONE +ENCODING:USASCII +CHARSET:1252 +COMPRESSION:NONE +OLDFILEUID:NONE +NEWFILEUID:NONE + + + + + +0 +INFO + +20260415120000 +ENG + +Some Bank +10898 + +10898 + + + + +1 + +0 +INFO + + +USD + +4111111111111111 + + +20260401 +20260430 + +DEBIT +20260402 +-52.10 +CC-001 +RESTAURANT XYZ + + +CREDIT +20260415 +450.00 +CC-002 +AUTOPAY PAYMENT - THANK YOU + + + + + + diff --git a/tehriehlbudget-backend/test/fixtures/statements/sample-v1.ofx b/tehriehlbudget-backend/test/fixtures/statements/sample-v1.ofx new file mode 100644 index 0000000..88485b7 --- /dev/null +++ b/tehriehlbudget-backend/test/fixtures/statements/sample-v1.ofx @@ -0,0 +1,69 @@ +OFXHEADER:100 +DATA:OFXSGML +VERSION:102 +SECURITY:NONE +ENCODING:USASCII +CHARSET:1252 +COMPRESSION:NONE +OLDFILEUID:NONE +NEWFILEUID:NONE + + + + + +0 +INFO + +20260415120000 +ENG + + + + +1 + +0 +INFO + + +USD + +123456789 +987654321 +CHECKING + + +20260401 +20260430 + +DEBIT +20260402 +-42.10 +20260402-001 +AMZN MKTP US*ABC123 + + +CREDIT +20260403 +2500.00 +20260403-001 +DIRECT DEPOSIT PAYROLL + + +DEBIT +20260404 +-6.75 +20260404-001 +STARBUCKS #4321 +Coffee + + + +4301.15 +20260430 + + + + + diff --git a/tehriehlbudget-backend/test/fixtures/statements/sample-v2.ofx b/tehriehlbudget-backend/test/fixtures/statements/sample-v2.ofx new file mode 100644 index 0000000..77eb091 --- /dev/null +++ b/tehriehlbudget-backend/test/fixtures/statements/sample-v2.ofx @@ -0,0 +1,53 @@ + + + + + + + 0 + INFO + + 20260415120000 + ENG + + + + + 1 + + 0 + INFO + + + USD + + 123 + 456 + SAVINGS + + + 20260401 + 20260430 + + CREDIT + 20260405 + 3.42 + INT-001 + Interest Earned + + + DEBIT + 20260410 + -500.00 + XFER-001 + Transfer to checking + + + + 5000.00 + 20260430 + + + + + diff --git a/tehriehlbudget-frontend/package.json b/tehriehlbudget-frontend/package.json index 3468a9e..37ce7db 100644 --- a/tehriehlbudget-frontend/package.json +++ b/tehriehlbudget-frontend/package.json @@ -1,7 +1,7 @@ { "name": "tehriehlbudget-frontend", "private": true, - "version": "0.3.2", + "version": "0.4.0", "type": "module", "scripts": { "dev": "vite", diff --git a/tehriehlbudget-frontend/src/components/ImportStatementDialog.test.tsx b/tehriehlbudget-frontend/src/components/ImportStatementDialog.test.tsx new file mode 100644 index 0000000..c9169f2 --- /dev/null +++ b/tehriehlbudget-frontend/src/components/ImportStatementDialog.test.tsx @@ -0,0 +1,299 @@ +import { render, screen, fireEvent, waitFor } from '@testing-library/react'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { + ImportStatementDialog, + type ParseResponse, +} from './ImportStatementDialog'; + +const bulkCreateTransactions = vi.fn(); +const fetchTransactions = vi.fn(); +const fetchAccounts = vi.fn(); + +const accounts = [ + { + id: 'acc-1', + userId: 'u', + name: 'Chase Checking', + type: 'CHECKING' as const, + balance: 1000, + createdAt: '', + updatedAt: '', + }, + { + id: 'acc-2', + userId: 'u', + name: 'Savings', + type: 'SAVINGS' as const, + balance: 5000, + createdAt: '', + updatedAt: '', + }, +]; + +vi.mock('@/stores/transactions', () => ({ + useTransactionsStore: () => ({ + bulkCreateTransactions, + fetchTransactions, + }), +})); + +vi.mock('@/stores/accounts', () => ({ + useAccountsStore: () => ({ + accounts, + fetchAccounts, + }), +})); + +vi.mock('@/lib/supabase', () => ({ + supabase: { + auth: { + getSession: async () => ({ + data: { session: { access_token: 'fake-token' } }, + }), + }, + }, +})); + +vi.mock('@/lib/runtime-config', () => ({ + getConfig: () => 'http://api.test', +})); + +function newRowResponse(over: any = {}) { + return { + sourceIndex: 0, + date: '2026-04-10', + amount: 42.1, + type: 'EXPENSE', + description: 'Coffee', + confidence: 0.95, + status: 'new', + ...over, + }; +} + +function mockParseResponse(rows: any[], extra: Partial = {}) { + return { + format: 'csv' as const, + account: { id: 'acc-1', name: 'Chase Checking', type: 'CHECKING' }, + rows, + warnings: [], + ...extra, + }; +} + +function mockFetchOnce(payload: any, ok = true) { + (globalThis as any).fetch = vi.fn(async () => ({ + ok, + status: ok ? 200 : 400, + json: async () => payload, + })); +} + +describe('ImportStatementDialog', () => { + beforeEach(() => { + bulkCreateTransactions.mockReset(); + bulkCreateTransactions.mockResolvedValue({ + created: 1, + ids: ['new-1'], + }); + fetchTransactions.mockReset(); + fetchTransactions.mockResolvedValue(undefined); + fetchAccounts.mockReset(); + }); + + function open(extras: any = {}) { + return render( + , + ); + } + + function chooseFile(name = 'test.csv') { + const input = document.querySelector( + 'input[type="file"]', + ) as HTMLInputElement; + const file = new File(['Date,Amount\n2026-04-01,10\n'], name, { + type: 'text/csv', + }); + fireEvent.change(input, { target: { files: [file] } }); + return file; + } + + it('renders the upload step initially', () => { + open(); + expect( + screen.getByText(/Upload a CSV, OFX, QFX, or PDF/i), + ).toBeInTheDocument(); + expect(screen.getByRole('button', { name: /continue/i })).toBeDisabled(); + }); + + it('enables Continue once a file is chosen', () => { + open(); + chooseFile(); + expect(screen.getByTestId('selected-file')).toHaveTextContent('test.csv'); + expect(screen.getByRole('button', { name: /continue/i })).not.toBeDisabled(); + }); + + it('after parsing a clean statement, advances to Review with rows', async () => { + mockFetchOnce( + mockParseResponse([ + newRowResponse({ sourceIndex: 0, description: 'Coffee', status: 'new' }), + newRowResponse({ + sourceIndex: 1, + description: 'Payday', + type: 'INCOME', + amount: 2000, + status: 'new', + }), + ]), + ); + open(); + chooseFile(); + fireEvent.click(screen.getByRole('button', { name: /continue/i })); + + await waitFor(() => + expect(screen.getByText(/Review each row/i)).toBeInTheDocument(), + ); + expect(screen.getAllByRole('row').length).toBeGreaterThan(1); + expect(screen.getByRole('button', { name: /Continue to confirm/i })).toBeInTheDocument(); + }); + + it('shows the Confirm step with the count in the primary button label', async () => { + mockFetchOnce( + mockParseResponse([ + newRowResponse({ sourceIndex: 0 }), + newRowResponse({ sourceIndex: 1, description: 'B' }), + ]), + ); + open(); + chooseFile(); + fireEvent.click(screen.getByRole('button', { name: /^continue$/i })); + await waitFor(() => + expect(screen.getByText(/Review each row/i)).toBeInTheDocument(), + ); + + fireEvent.click(screen.getByRole('button', { name: /Continue to confirm/i })); + + expect( + screen.getByRole('button', { name: /Import 2 transactions/i }), + ).toBeInTheDocument(); + expect( + screen.getByRole('button', { name: /Back to review/i }), + ).toBeInTheDocument(); + }); + + it('Back to review preserves the row selection state', async () => { + mockFetchOnce( + mockParseResponse([ + newRowResponse({ sourceIndex: 0, description: 'Coffee' }), + newRowResponse({ sourceIndex: 1, description: 'Lunch' }), + ]), + ); + open(); + chooseFile(); + fireEvent.click(screen.getByRole('button', { name: /^continue$/i })); + await waitFor(() => + expect(screen.getByText(/Review each row/i)).toBeInTheDocument(), + ); + // Uncheck the first row. + const checkboxes = screen.getAllByRole('checkbox'); + expect(checkboxes[0]).toBeChecked(); + fireEvent.click(checkboxes[0]); + expect(checkboxes[0]).not.toBeChecked(); + + fireEvent.click(screen.getByRole('button', { name: /Continue to confirm/i })); + expect( + screen.getByRole('button', { name: /Import 1 transaction/i }), + ).toBeInTheDocument(); + + fireEvent.click(screen.getByRole('button', { name: /Back to review/i })); + const recheckboxes = screen.getAllByRole('checkbox'); + expect(recheckboxes[0]).not.toBeChecked(); + expect(recheckboxes[1]).toBeChecked(); + }); + + it('does NOT call bulkCreateTransactions until Confirm is clicked', async () => { + mockFetchOnce( + mockParseResponse([newRowResponse({ sourceIndex: 0 })]), + ); + open(); + chooseFile(); + fireEvent.click(screen.getByRole('button', { name: /^continue$/i })); + await waitFor(() => + expect(screen.getByText(/Review each row/i)).toBeInTheDocument(), + ); + fireEvent.click(screen.getByRole('button', { name: /Continue to confirm/i })); + expect(bulkCreateTransactions).not.toHaveBeenCalled(); + + fireEvent.click(screen.getByRole('button', { name: /Import 1 transaction/i })); + await waitFor(() => expect(bulkCreateTransactions).toHaveBeenCalled()); + const [rows, source] = bulkCreateTransactions.mock.calls[0]; + expect(rows).toHaveLength(1); + expect(source.kind).toBe('statement-import'); + }); + + it('defaults duplicate rows to unchecked', async () => { + mockFetchOnce( + mockParseResponse([ + newRowResponse({ + sourceIndex: 0, + status: 'duplicate', + duplicateOf: { + id: 'existing', + date: '2026-04-10', + amount: 42.1, + description: 'Coffee', + }, + }), + newRowResponse({ sourceIndex: 1, description: 'Lunch' }), + ]), + ); + open(); + chooseFile(); + fireEvent.click(screen.getByRole('button', { name: /^continue$/i })); + await waitFor(() => + expect(screen.getByText(/Review each row/i)).toBeInTheDocument(), + ); + const checkboxes = screen.getAllByRole('checkbox'); + expect(checkboxes[0]).not.toBeChecked(); + expect(checkboxes[1]).toBeChecked(); + }); + + it('shows the Column Mapping step when the backend asks for one', async () => { + mockFetchOnce( + mockParseResponse([], { + needsMapping: { + headers: ['Col1', 'Col2', 'Col3'], + sample: [['1', '2', '3']], + guess: {}, + }, + }), + ); + open(); + chooseFile(); + fireEvent.click(screen.getByRole('button', { name: /^continue$/i })); + await waitFor(() => + expect( + screen.getByText(/Tell us which field each column/i), + ).toBeInTheDocument(), + ); + // Headers are listed as table cells + expect(screen.getByText('Col1')).toBeInTheDocument(); + expect(screen.getByText('Col2')).toBeInTheDocument(); + expect(screen.getByText('Col3')).toBeInTheDocument(); + }); + + it('shows a parse error inline', async () => { + mockFetchOnce({ message: 'This file is corrupt' }, false); + open(); + chooseFile(); + fireEvent.click(screen.getByRole('button', { name: /^continue$/i })); + await waitFor(() => + expect(screen.getByText('This file is corrupt')).toBeInTheDocument(), + ); + }); +}); diff --git a/tehriehlbudget-frontend/src/components/ImportStatementDialog.tsx b/tehriehlbudget-frontend/src/components/ImportStatementDialog.tsx new file mode 100644 index 0000000..1d536d4 --- /dev/null +++ b/tehriehlbudget-frontend/src/components/ImportStatementDialog.tsx @@ -0,0 +1,995 @@ +import { useEffect, useMemo, useState } from 'react'; +import { + Dialog, + DialogContent, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { Badge } from '@/components/ui/badge'; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; +import { Upload, ArrowRight } from 'lucide-react'; +import { supabase } from '@/lib/supabase'; +import { getConfig } from '@/lib/runtime-config'; +import { formatDate, toDateInputValue } from '@/lib/dates'; +import { useTransactionsStore, type BulkCreateInput } from '@/stores/transactions'; +import { useAccountsStore } from '@/stores/accounts'; +import type { Account } from '@/stores/accounts'; + +type RowStatus = 'new' | 'duplicate' | 'needs_review' | 'possible_transfer'; +type RowType = 'INCOME' | 'EXPENSE' | 'TRANSFER'; + +export interface DuplicateMatch { + id: string; + date: string; + amount: number; + description: string; +} + +export interface TransferCandidate { + accountId: string; + accountName: string; + matchedTransactionId: string; +} + +export interface ParsedRowResponse { + sourceIndex: number; + date: string; + amount: number; + type: 'INCOME' | 'EXPENSE'; + description: string; + externalId?: string; + status: RowStatus; + confidence: number; + duplicateOf?: DuplicateMatch; + transferCandidate?: TransferCandidate; +} + +export interface ColumnMapping { + date?: string; + description?: string; + amount?: string; + debit?: string; + credit?: string; + type?: string; +} + +export interface ParseResponse { + format: 'csv' | 'ofx' | 'pdf'; + account: { id: string; name: string; type: string }; + rows: ParsedRowResponse[]; + warnings: string[]; + needsMapping?: { + headers: string[]; + sample: string[][]; + guess: ColumnMapping; + }; +} + +interface ReviewRow { + sourceIndex: number; + date: string; + amount: number; + type: RowType; + description: string; + externalId?: string; + status: RowStatus; + duplicateOf?: DuplicateMatch; + transferCandidate?: TransferCandidate; + destinationAccountId?: string; + included: boolean; +} + +type Step = 'upload' | 'mapping' | 'review' | 'confirm' | 'submitting'; + +interface Props { + open: boolean; + onOpenChange: (open: boolean) => void; + defaultAccountId?: string; + onImported?: (counts: { created: number; skipped: number }) => void; +} + +const MAPPING_FIELDS: { value: keyof ColumnMapping | 'ignore'; label: string }[] = [ + { value: 'date', label: 'Date' }, + { value: 'description', label: 'Description' }, + { value: 'amount', label: 'Amount (signed)' }, + { value: 'debit', label: 'Debit (withdrawals)' }, + { value: 'credit', label: 'Credit (deposits)' }, + { value: 'type', label: 'Type (Dr/Cr or income/expense)' }, + { value: 'ignore', label: 'Ignore' }, +]; + +function currency(n: number): string { + return `$${Number(n).toLocaleString('en-US', { + minimumFractionDigits: 2, + maximumFractionDigits: 2, + })}`; +} + +function statusBadge(status: RowStatus) { + switch (status) { + case 'duplicate': + return ( + + Duplicate + + ); + case 'needs_review': + return ( + + Needs review + + ); + case 'possible_transfer': + return ( + + Possible transfer + + ); + default: + return ( + + New + + ); + } +} + +function defaultIncluded(status: RowStatus): boolean { + if (status === 'duplicate') return false; + if (status === 'needs_review') return false; + return true; +} + +async function uploadAndParse( + file: File, + accountId: string, + mapping?: ColumnMapping, +): Promise { + const { data: session } = await supabase.auth.getSession(); + const token = session.session?.access_token; + const apiUrl = getConfig('VITE_API_URL') || 'http://localhost:3000'; + const fd = new FormData(); + fd.append('file', file); + fd.append('accountId', accountId); + if (mapping) fd.append('mapping', JSON.stringify(mapping)); + const res = await fetch(`${apiUrl}/statements/parse`, { + method: 'POST', + headers: token ? { Authorization: `Bearer ${token}` } : {}, + body: fd, + }); + if (!res.ok) { + const err = await res.json().catch(() => ({ message: res.statusText })); + throw new Error(err.message || 'Failed to parse statement'); + } + return res.json(); +} + +export function ImportStatementDialog({ + open, + onOpenChange, + defaultAccountId, + onImported, +}: Props) { + const { accounts, fetchAccounts } = useAccountsStore(); + const { bulkCreateTransactions, fetchTransactions } = useTransactionsStore(); + + const [step, setStep] = useState('upload'); + const [file, setFile] = useState(null); + const [accountId, setAccountId] = useState(defaultAccountId ?? ''); + const [parseResult, setParseResult] = useState(null); + const [mapping, setMapping] = useState({}); + const [rows, setRows] = useState([]); + const [error, setError] = useState(null); + const [working, setWorking] = useState(false); + + useEffect(() => { + if (open) { + fetchAccounts(); + setStep('upload'); + setFile(null); + setAccountId(defaultAccountId ?? ''); + setParseResult(null); + setMapping({}); + setRows([]); + setError(null); + setWorking(false); + } + }, [open, defaultAccountId, fetchAccounts]); + + const selectedAccount = accounts.find((a) => a.id === accountId); + + const handleParse = async (currentMapping?: ColumnMapping) => { + if (!file || !accountId) return; + setError(null); + setWorking(true); + try { + const result = await uploadAndParse(file, accountId, currentMapping); + setParseResult(result); + if (result.needsMapping) { + setMapping(result.needsMapping.guess); + setStep('mapping'); + } else { + const reviewRows: ReviewRow[] = result.rows.map((r) => ({ + sourceIndex: r.sourceIndex, + date: r.date, + amount: r.amount, + type: r.type as RowType, + description: r.description, + externalId: r.externalId, + status: r.status, + duplicateOf: r.duplicateOf, + transferCandidate: r.transferCandidate, + included: defaultIncluded(r.status), + })); + setRows(reviewRows); + setStep('review'); + } + } catch (e) { + setError(e instanceof Error ? e.message : 'Could not parse this statement'); + } finally { + setWorking(false); + } + }; + + const handleMappingSubmit = async () => { + await handleParse(mapping); + }; + + const updateRow = (sourceIndex: number, patch: Partial) => { + setRows((prev) => + prev.map((r) => (r.sourceIndex === sourceIndex ? { ...r, ...patch } : r)), + ); + }; + + const markAllIncluded = (included: boolean) => + setRows((prev) => prev.map((r) => ({ ...r, included }))); + const skipAllDuplicates = () => + setRows((prev) => + prev.map((r) => (r.status === 'duplicate' ? { ...r, included: false } : r)), + ); + + const selected = rows.filter((r) => r.included); + const duplicates = rows.filter((r) => r.status === 'duplicate'); + const needsReview = rows.filter((r) => r.status === 'needs_review'); + const transfers = rows.filter((r) => r.type === 'TRANSFER' || r.status === 'possible_transfer'); + + const balanceDelta = useMemo(() => { + const byAccount = new Map< + string, + { name: string; delta: number; type: Account['type'] } + >(); + for (const row of selected) { + const acc = accounts.find((a) => a.id === accountId); + if (!acc) continue; + const isLiability = acc.type === 'CREDIT' || acc.type === 'LOAN'; + let signed = 0; + if (row.type === 'INCOME') { + signed = isLiability ? -row.amount : row.amount; + } else if (row.type === 'EXPENSE') { + signed = isLiability ? row.amount : -row.amount; + } else if (row.type === 'TRANSFER') { + // The "source" side of a transfer is what runs through the selected account + signed = isLiability ? row.amount : -row.amount; + } + const existing = byAccount.get(acc.id); + byAccount.set(acc.id, { + name: acc.name, + type: acc.type, + delta: (existing?.delta ?? 0) + signed, + }); + if (row.type === 'TRANSFER' && row.destinationAccountId) { + const destAcc = accounts.find((a) => a.id === row.destinationAccountId); + if (destAcc) { + const destIsLiab = destAcc.type === 'CREDIT' || destAcc.type === 'LOAN'; + const destDelta = destIsLiab ? -row.amount : row.amount; + const destExisting = byAccount.get(destAcc.id); + byAccount.set(destAcc.id, { + name: destAcc.name, + type: destAcc.type, + delta: (destExisting?.delta ?? 0) + destDelta, + }); + } + } + } + return byAccount; + }, [selected, accounts]); + + const handleConfirm = async () => { + setStep('submitting'); + setError(null); + try { + const payload: BulkCreateInput[] = selected.map((r) => { + const row: BulkCreateInput = { + accountId, + amount: r.amount, + type: r.type, + description: r.description, + date: r.date, + }; + if (r.externalId) row.externalId = r.externalId; + if (r.type === 'TRANSFER' && r.destinationAccountId) { + row.destinationAccountId = r.destinationAccountId; + } + return row; + }); + const result = await bulkCreateTransactions(payload, { + kind: 'statement-import', + label: file?.name ?? 'statement', + }); + onImported?.({ + created: result.created, + skipped: rows.length - selected.length, + }); + await fetchTransactions({}, 1); + onOpenChange(false); + } catch (e) { + setError(e instanceof Error ? e.message : 'Import failed'); + setStep('confirm'); + } + }; + + return ( + + + + Import statement + + + {step === 'upload' && ( + handleParse()} + working={working} + error={error} + /> + )} + + {step === 'mapping' && parseResult?.needsMapping && ( + setStep('upload')} + onSubmit={handleMappingSubmit} + working={working} + error={error} + /> + )} + + {step === 'review' && ( + markAllIncluded(true)} + onSkipDuplicates={skipAllDuplicates} + onClearAll={() => markAllIncluded(false)} + onCancel={() => onOpenChange(false)} + onContinue={() => setStep('confirm')} + warnings={parseResult?.warnings ?? []} + counts={{ + selected: selected.length, + duplicates: duplicates.length, + needsReview: needsReview.length, + possibleTransfers: transfers.length, + }} + /> + )} + + {(step === 'confirm' || step === 'submitting') && ( + setStep('review')} + onConfirm={handleConfirm} + submitting={step === 'submitting'} + error={error} + /> + )} + + + ); +} + +// --- Step renderers --- + +interface UploadStepProps { + accounts: Account[]; + accountId: string; + onAccountChange: (id: string) => void; + file: File | null; + onFileChange: (file: File | null) => void; + onSubmit: () => void; + working: boolean; + error: string | null; +} + +function UploadStep({ + accounts, + accountId, + onAccountChange, + file, + onFileChange, + onSubmit, + working, + error, +}: UploadStepProps) { + const accountName = (id: string | null | undefined) => + id ? (accounts.find((a) => a.id === id)?.name ?? '') : ''; + return ( + <> +
+

+ Upload a CSV, OFX, QFX, or PDF statement to bulk-import its transactions + into one of your accounts. Duplicates against your existing transactions + will be flagged for review. +

+
+ + +
+
+ + {file && ( +

+ {file.name} +

+ )} +
+ {error &&

{error}

} +
+ + + + + ); +} + +interface MappingStepProps { + headers: string[]; + sample: string[][]; + mapping: ColumnMapping; + onMappingChange: (m: ColumnMapping) => void; + onBack: () => void; + onSubmit: () => void; + working: boolean; + error: string | null; +} + +function MappingStep({ + headers, + sample, + mapping, + onMappingChange, + onBack, + onSubmit, + working, + error, +}: MappingStepProps) { + const fieldOfHeader = (h: string): string => { + for (const [k, v] of Object.entries(mapping)) { + if (v === h) return k; + } + return 'ignore'; + }; + const setFieldForHeader = (h: string, field: string) => { + const next: ColumnMapping = { ...mapping }; + // Clear any prior assignment of this field + (Object.keys(next) as (keyof ColumnMapping)[]).forEach((k) => { + if (next[k] === h) delete next[k]; + }); + if (field !== 'ignore') { + // Also un-set if another header was using this field + (Object.keys(next) as (keyof ColumnMapping)[]).forEach((k) => { + if (k === field) delete next[k]; + }); + (next as Record)[field] = h; + } + onMappingChange(next); + }; + + return ( + <> +
+

+ We couldn't auto-detect the columns in this file. Tell us which field + each column represents. +

+
+ + + + Column + Maps to + Sample values + + + + {headers.map((h, i) => ( + + {h} + + + + + {sample.map((row) => row[i]).filter(Boolean).slice(0, 3).join(', ')} + + + ))} + +
+
+ {error &&

{error}

} +
+ + + + + + ); +} + +interface ReviewStepProps { + rows: ReviewRow[]; + accounts: Account[]; + sourceAccount: Account | undefined; + onUpdateRow: (i: number, patch: Partial) => void; + onIncludeAll: () => void; + onSkipDuplicates: () => void; + onClearAll: () => void; + onCancel: () => void; + onContinue: () => void; + warnings: string[]; + counts: { + selected: number; + duplicates: number; + needsReview: number; + possibleTransfers: number; + }; +} + +function ReviewStep({ + rows, + accounts, + sourceAccount, + onUpdateRow, + onIncludeAll, + onSkipDuplicates, + onClearAll, + onCancel, + onContinue, + warnings, + counts, +}: ReviewStepProps) { + const otherAccounts = accounts.filter((a) => a.id !== sourceAccount?.id); + return ( + <> +
+
+

+ Review each row before import. Edit fields inline if needed; uncheck + anything you don't want to import. +

+
+ + + +
+
+ {warnings.length > 0 && ( +
+ {warnings.slice(0, 3).map((w, i) => ( +

{w}

+ ))} + {warnings.length > 3 && ( +

+ And {warnings.length - 3} more — open the developer console for details. +

+ )} +
+ )} +
+ + + + + Date + Description + Amount + Type + Status + + + + {rows.map((r) => ( + + + + onUpdateRow(r.sourceIndex, { included: e.target.checked }) + } + /> + + + + onUpdateRow(r.sourceIndex, { date: e.target.value }) + } + /> + + + + onUpdateRow(r.sourceIndex, { description: e.target.value }) + } + /> + {r.duplicateOf && ( +

+ Matches existing {formatDate(r.duplicateOf.date)} ·{' '} + {currency(r.duplicateOf.amount)} · {r.duplicateOf.description} +

+ )} + {r.transferCandidate && !r.destinationAccountId && ( +

+ Possible transfer with {r.transferCandidate.accountName}. + Select a destination to mark as transfer. +

+ )} +
+ + + onUpdateRow(r.sourceIndex, { + amount: parseFloat(e.target.value) || 0, + }) + } + /> + + + + {r.type === 'TRANSFER' && ( + + )} + + {statusBadge(r.status)} +
+ ))} +
+
+
+

+ {counts.selected} selected · {counts.duplicates} duplicates ·{' '} + {counts.needsReview} need review · {counts.possibleTransfers} possible + transfers +

+
+ + + + + + ); +} + +interface ConfirmStepProps { + counts: { + selected: number; + duplicates: number; + needsReview: number; + transfers: number; + }; + balanceDelta: Map; + sourceAccount: Account | undefined; + selectedRows: ReviewRow[]; + accounts: Account[]; + onBack: () => void; + onConfirm: () => void; + submitting: boolean; + error: string | null; +} + +function ConfirmStep({ + counts, + balanceDelta, + sourceAccount, + selectedRows, + accounts, + onBack, + onConfirm, + submitting, + error, +}: ConfirmStepProps) { + const [expanded, setExpanded] = useState(false); + + return ( + <> +
+
+
+

{counts.selected}

+

To import

+
+
+

{counts.duplicates}

+

Duplicates skipped

+
+
+

{counts.needsReview}

+

Needed review

+
+
+

{counts.transfers}

+

Transfers

+
+
+ + {balanceDelta.size > 0 && ( +
+

+ Projected balance impact +

+
    + {Array.from(balanceDelta.entries()).map(([id, info]) => { + const acc = accounts.find((a) => a.id === id); + const current = Number(acc?.balance ?? 0); + const projected = current + info.delta; + return ( +
  • + {info.name} + + {currency(current)} {' '} + = 0 ? 'text-emerald-700' : 'text-red-700' + } + > + {currency(projected)} + + +
  • + ); + })} +
+
+ )} + + + {expanded && ( +
+ + + + Date + Description + Amount + Type + Account + + + + {selectedRows.map((r) => ( + + {formatDate(r.date)} + {r.description} + + {currency(r.amount)} + + + {r.type.charAt(0) + r.type.slice(1).toLowerCase()} + + + {sourceAccount?.name ?? '—'} + {r.destinationAccountId && ( + <> + {' '} + {' '} + {accounts.find((a) => a.id === r.destinationAccountId) + ?.name ?? '—'} + + )} + + + ))} + +
+
+ )} + + {error &&

{error}

} +
+ + + + + + ); +} diff --git a/tehriehlbudget-frontend/src/pages/Transactions.tsx b/tehriehlbudget-frontend/src/pages/Transactions.tsx index 3f89521..ad0dd76 100644 --- a/tehriehlbudget-frontend/src/pages/Transactions.tsx +++ b/tehriehlbudget-frontend/src/pages/Transactions.tsx @@ -40,12 +40,14 @@ import { Trash2, Paperclip, Pencil, + Upload, ArrowRight, } from 'lucide-react'; import { TransactionForm, type TransactionFormData } from '@/components/TransactionForm'; import { ReceiptViewer } from '@/components/ReceiptViewer'; import { ConfirmDialog } from '@/components/ConfirmDialog'; import { ExportTransactionsDialog } from '@/components/ExportTransactionsDialog'; +import { ImportStatementDialog } from '@/components/ImportStatementDialog'; import { formatDate } from '@/lib/dates'; const TRANSACTION_TYPES = ['INCOME', 'EXPENSE', 'TRANSFER'] as const; @@ -74,6 +76,8 @@ export function Transactions() { const [viewingReceipt, setViewingReceipt] = useState(null); const [deleting, setDeleting] = useState(null); const [exportOpen, setExportOpen] = useState(false); + const [importOpen, setImportOpen] = useState(false); + const [importToast, setImportToast] = useState(null); const [expandedId, setExpandedId] = useState(null); useEffect(() => { @@ -129,6 +133,9 @@ export function Transactions() { + }> Add Transaction @@ -392,6 +399,33 @@ export function Transactions() { setViewingReceipt(null)} /> + { + const skippedNote = skipped > 0 ? ` Skipped ${skipped} duplicate${skipped === 1 ? '' : 's'}.` : ''; + setImportToast(`Imported ${created} transaction${created === 1 ? '' : 's'}.${skippedNote}`); + fetchAccounts(); + }} + /> + + {importToast && ( +
setImportToast(null)} + > + {importToast} + +
+ )} + !open && setDeleting(null)} @@ -422,6 +456,7 @@ export function Transactions() { open={exportOpen} onOpenChange={setExportOpen} baseFilters={filters} + accountName={accountName(filters.accountId)} /> ); diff --git a/tehriehlbudget-frontend/src/stores/transactions.test.ts b/tehriehlbudget-frontend/src/stores/transactions.test.ts index 84a5b58..446eea2 100644 --- a/tehriehlbudget-frontend/src/stores/transactions.test.ts +++ b/tehriehlbudget-frontend/src/stores/transactions.test.ts @@ -97,6 +97,57 @@ describe('useTransactionsStore', () => { expect(txns[1]).toEqual({ id: '2', description: 'other' }); }); + describe('bulkCreateTransactions', () => { + it('posts the rows to /transactions/bulk along with the source metadata', async () => { + mockApi.post.mockResolvedValue({ created: 2, ids: ['a', 'b'] }); + + const rows = [ + { + accountId: 'acc-1', + amount: 5, + type: 'EXPENSE' as const, + description: 'Coffee', + date: '2026-04-10', + }, + { + accountId: 'acc-1', + amount: 200, + type: 'INCOME' as const, + description: 'Refund', + date: '2026-04-11', + }, + ]; + const result = await useTransactionsStore + .getState() + .bulkCreateTransactions(rows, { + kind: 'statement-import', + label: 'chase-2026-04.csv', + }); + + expect(mockApi.post).toHaveBeenCalledWith('/transactions/bulk', { + transactions: rows, + source: 'statement-import', + sourceLabel: 'chase-2026-04.csv', + }); + expect(result).toEqual({ created: 2, ids: ['a', 'b'] }); + }); + + it('propagates a partial-failure result from the backend', async () => { + mockApi.post.mockResolvedValue({ + created: 50, + ids: Array.from({ length: 50 }, (_, i) => `id-${i}`), + partial: { attempted: 60, failed: 10, error: 'timeout' }, + }); + const result = await useTransactionsStore + .getState() + .bulkCreateTransactions([], { + kind: 'statement-import', + label: 'big.csv', + }); + expect(result.partial?.failed).toBe(10); + }); + }); + describe('fetchAllTransactions', () => { it('hits /transactions?all=true and returns the data array directly without writing to store', async () => { const data = [{ id: '1' }, { id: '2' }]; diff --git a/tehriehlbudget-frontend/src/stores/transactions.ts b/tehriehlbudget-frontend/src/stores/transactions.ts index 057f408..834a63b 100644 --- a/tehriehlbudget-frontend/src/stores/transactions.ts +++ b/tehriehlbudget-frontend/src/stores/transactions.ts @@ -28,6 +28,24 @@ export interface TransactionFilters { endDate?: string; } +export interface BulkCreateInput { + accountId: string; + destinationAccountId?: string; + categoryId?: string; + amount: number; + type: 'INCOME' | 'EXPENSE' | 'TRANSFER'; + description: string; + notes?: string; + date: string; + externalId?: string; +} + +export interface BulkCreateResult { + created: number; + ids: string[]; + partial?: { attempted: number; failed: number; error: string }; +} + interface TransactionsState { transactions: Transaction[]; total: number; @@ -36,6 +54,10 @@ interface TransactionsState { fetchTransactions: (filters?: TransactionFilters, page?: number) => Promise; fetchAllTransactions: (filters?: TransactionFilters) => Promise; createTransaction: (data: Partial) => Promise; + bulkCreateTransactions: ( + rows: BulkCreateInput[], + source: { kind: 'statement-import'; label: string }, + ) => Promise; updateTransaction: (id: string, data: Partial) => Promise; deleteTransaction: (id: string) => Promise; } @@ -85,6 +107,14 @@ export const useTransactionsStore = create((set) => ({ set((state) => ({ transactions: [transaction, ...state.transactions] })); }, + bulkCreateTransactions: async (rows, source) => { + return api.post('/transactions/bulk', { + transactions: rows, + source: source.kind, + sourceLabel: source.label, + }); + }, + updateTransaction: async (id, data) => { const updated = await api.patch(`/transactions/${id}`, data); set((state) => ({