From 7c16182d617cdec3d52ded8ae52ba98f0d92d749 Mon Sep 17 00:00:00 2001 From: Marvin Elsen Date: Tue, 24 Sep 2024 10:33:32 +0200 Subject: [PATCH 1/8] Do not close input stream in parse method --- .../marvinelsen/cedict/internal/CedictParserImpl.kt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt b/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt index fe06ccf..5afab6e 100644 --- a/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt +++ b/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt @@ -20,11 +20,11 @@ internal class CedictParserImpl : CedictParser { } override fun parseCedict(inputStream: InputStream) = - inputStream.bufferedReader().useLines { lines -> - lines.filterNot(::isComment) - .map(::toCedictEntry) - .toList() - } + inputStream + .bufferedReader() + .readLines() + .filterNot(::isComment) + .map(::toCedictEntry) private fun isComment(line: String) = line[0] == COMMENT_MARKER From 73e627b6bebe2695d9dc39e7af277f771e5087df Mon Sep 17 00:00:00 2001 From: Marvin Elsen Date: Tue, 24 Sep 2024 10:34:00 +0200 Subject: [PATCH 2/8] Return sequence instead of list from parse method --- src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt | 2 +- .../kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt b/src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt index 72aa0b6..300c59b 100644 --- a/src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt +++ b/src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt @@ -4,7 +4,7 @@ import com.marvinelsen.cedict.internal.CedictParserImpl import java.io.InputStream interface CedictParser { - fun parseCedict(inputStream: InputStream): List + fun parseCedict(inputStream: InputStream): Sequence companion object { val instance: CedictParser by lazy { CedictParserImpl() } diff --git a/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt b/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt index 5afab6e..f64a4a9 100644 --- a/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt +++ b/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt @@ -22,7 +22,7 @@ internal class CedictParserImpl : CedictParser { override fun parseCedict(inputStream: InputStream) = inputStream .bufferedReader() - .readLines() + .lineSequence() .filterNot(::isComment) .map(::toCedictEntry) From 8592f4fe6780401e19e09aefbbcba7450c51bb60 Mon Sep 17 00:00:00 2001 From: Marvin Elsen Date: Tue, 24 Sep 2024 10:34:23 +0200 Subject: [PATCH 3/8] Bump version --- build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle.kts b/build.gradle.kts index 1194287..906c886 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -5,7 +5,7 @@ plugins { } group = "com.marvinelsen" -version = "1.0.1" +version = "2.0.0" repositories { mavenCentral() From 200cd7d06b2a27fa6bb4681827f67bf204009fa6 Mon Sep 17 00:00:00 2001 From: Marvin Elsen Date: Tue, 24 Sep 2024 10:36:25 +0200 Subject: [PATCH 4/8] Fix test --- .../com/marvinelsen/cedict/internal/CedictParserImplTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt b/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt index 6af4ff1..ae0faad 100644 --- a/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt +++ b/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt @@ -32,6 +32,6 @@ class CedictParserImplTest : ShouldSpec({ val cedictParser = CedictParserImpl() val cedictEntries = cedictParser.parseCedict(cedictFileStream) - cedictEntries.size shouldBe 122_508 + cedictEntries.toList().size shouldBe 122_508 } }) From 9d3f1ec456fa78567cb6e2044f9f8b04e7f41e16 Mon Sep 17 00:00:00 2001 From: Marvin Elsen Date: Tue, 24 Sep 2024 10:36:49 +0200 Subject: [PATCH 5/8] Rename CedictParserImpl to RegexCedictParser --- src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt | 4 ++-- .../internal/{CedictParserImpl.kt => RegexCedictParser.kt} | 2 +- .../com/marvinelsen/cedict/internal/CedictParserImplTest.kt | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename src/main/kotlin/com/marvinelsen/cedict/internal/{CedictParserImpl.kt => RegexCedictParser.kt} (97%) diff --git a/src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt b/src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt index 300c59b..154b271 100644 --- a/src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt +++ b/src/main/kotlin/com/marvinelsen/cedict/api/CedictParser.kt @@ -1,12 +1,12 @@ package com.marvinelsen.cedict.api -import com.marvinelsen.cedict.internal.CedictParserImpl +import com.marvinelsen.cedict.internal.RegexCedictParser import java.io.InputStream interface CedictParser { fun parseCedict(inputStream: InputStream): Sequence companion object { - val instance: CedictParser by lazy { CedictParserImpl() } + val instance: CedictParser by lazy { RegexCedictParser() } } } diff --git a/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt b/src/main/kotlin/com/marvinelsen/cedict/internal/RegexCedictParser.kt similarity index 97% rename from src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt rename to src/main/kotlin/com/marvinelsen/cedict/internal/RegexCedictParser.kt index f64a4a9..1ed58fb 100644 --- a/src/main/kotlin/com/marvinelsen/cedict/internal/CedictParserImpl.kt +++ b/src/main/kotlin/com/marvinelsen/cedict/internal/RegexCedictParser.kt @@ -6,7 +6,7 @@ import com.marvinelsen.cedict.api.CedictParser import com.marvinelsen.chinese.transliteration.Syllable import java.io.InputStream -internal class CedictParserImpl : CedictParser { +internal class RegexCedictParser : CedictParser { companion object { private const val DEFINITION_SEPARATOR = '/' private const val GLOSS_SEPARATOR = ';' diff --git a/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt b/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt index ae0faad..5293e50 100644 --- a/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt +++ b/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt @@ -9,7 +9,7 @@ import java.util.zip.GZIPInputStream class CedictParserImplTest : ShouldSpec({ should("parse lines correctly") { - val cedictParser = CedictParserImpl() + val cedictParser = RegexCedictParser() val cedictEntry = cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/") @@ -29,7 +29,7 @@ class CedictParserImplTest : ShouldSpec({ val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz" val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath)) - val cedictParser = CedictParserImpl() + val cedictParser = RegexCedictParser() val cedictEntries = cedictParser.parseCedict(cedictFileStream) cedictEntries.toList().size shouldBe 122_508 From ebf608b2fa048b84b4cbee818edc5755ecf248bd Mon Sep 17 00:00:00 2001 From: Marvin Elsen Date: Tue, 24 Sep 2024 10:37:08 +0200 Subject: [PATCH 6/8] Rename test --- .../{CedictParserImplTest.kt => RegexCedictParserTest.kt} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/test/kotlin/com/marvinelsen/cedict/internal/{CedictParserImplTest.kt => RegexCedictParserTest.kt} (96%) diff --git a/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt b/src/test/kotlin/com/marvinelsen/cedict/internal/RegexCedictParserTest.kt similarity index 96% rename from src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt rename to src/test/kotlin/com/marvinelsen/cedict/internal/RegexCedictParserTest.kt index 5293e50..85fa677 100644 --- a/src/test/kotlin/com/marvinelsen/cedict/internal/CedictParserImplTest.kt +++ b/src/test/kotlin/com/marvinelsen/cedict/internal/RegexCedictParserTest.kt @@ -7,7 +7,7 @@ import io.kotest.core.spec.style.ShouldSpec import io.kotest.matchers.shouldBe import java.util.zip.GZIPInputStream -class CedictParserImplTest : ShouldSpec({ +class RegexCedictParserTest : ShouldSpec({ should("parse lines correctly") { val cedictParser = RegexCedictParser() val cedictEntry = From 9fda246045d5d957a2dc9166af43f05643f499e7 Mon Sep 17 00:00:00 2001 From: Marvin Elsen Date: Tue, 24 Sep 2024 10:41:26 +0200 Subject: [PATCH 7/8] Close stream properly in test --- .../marvinelsen/cedict/internal/RegexCedictParserTest.kt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/test/kotlin/com/marvinelsen/cedict/internal/RegexCedictParserTest.kt b/src/test/kotlin/com/marvinelsen/cedict/internal/RegexCedictParserTest.kt index 85fa677..a0bf097 100644 --- a/src/test/kotlin/com/marvinelsen/cedict/internal/RegexCedictParserTest.kt +++ b/src/test/kotlin/com/marvinelsen/cedict/internal/RegexCedictParserTest.kt @@ -30,8 +30,11 @@ class RegexCedictParserTest : ShouldSpec({ val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath)) val cedictParser = RegexCedictParser() - val cedictEntries = cedictParser.parseCedict(cedictFileStream) - cedictEntries.toList().size shouldBe 122_508 + cedictFileStream.use { + val cedictEntries = cedictParser.parseCedict(cedictFileStream) + + cedictEntries.toList().size shouldBe 122_508 + } } }) From 3f8be7049b01c6e239a88e8b635b9b14214dcd80 Mon Sep 17 00:00:00 2001 From: Marvin Elsen Date: Tue, 24 Sep 2024 10:56:57 +0200 Subject: [PATCH 8/8] Make toCedictDefinitions method more concise --- .../com/marvinelsen/cedict/internal/RegexCedictParser.kt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/main/kotlin/com/marvinelsen/cedict/internal/RegexCedictParser.kt b/src/main/kotlin/com/marvinelsen/cedict/internal/RegexCedictParser.kt index 1ed58fb..ae72d1e 100644 --- a/src/main/kotlin/com/marvinelsen/cedict/internal/RegexCedictParser.kt +++ b/src/main/kotlin/com/marvinelsen/cedict/internal/RegexCedictParser.kt @@ -48,9 +48,5 @@ internal class RegexCedictParser : CedictParser { private fun toCedictDefinitions(definitions: String) = definitions .split(DEFINITION_SEPARATOR) - .map { - CedictDefinition( - glosses = it.split(GLOSS_SEPARATOR).map(String::trim) - ) - } + .map { CedictDefinition(it.split(GLOSS_SEPARATOR).map(String::trim)) } }