@inproceedings { , title = {Clinical dialogue transcription error correction using Seq2Seq models.}, abstract = {Good communication is critical to good healthcare. Clinical dialogue is a conversation between health practitioners and their patients, with the explicit goal of obtaining and sharing medical information. This information contributes to medical decision-making regarding the patient and plays a crucial role in their healthcare journey. The reliance on note taking and manual scribing processes are extremely inefficient and leads to manual transcription errors when digitizing notes. Automatic Speech Recognition (ASR) plays a significant role in speech-to-text applications, and can be directly used as a text generator in conversational applications. However, recording clinical dialogue presents a number of general and domain-specific challenges. In this paper, we present a seq2seq learning approach for ASR transcription error correction of clinical dialogues. We introduce a new Gastrointestinal Clinical Dialogue (GCD) Dataset which was gathered by healthcare professionals from a NHS Inflammatory Bowel Disease clinic and use this in a comparative study with four commercial ASR systems. Using self-supervision strategies, we fine-tune a seq2seq model on a mask-filling task using a domain-specific PubMed dataset which we have shared publicly for future research. The BART model fine-tuned for mask-filling was able to correct transcription errors and achieve lower word error rates for three out of four commercial ASR outputs.}, conference = {6th International workshop on health intelligence (W3PHIAI-22), co-located with the AAAI (Association for the Advancement of Artificial Intelligence) 34th Innovative applications of artificial intelligence (IAAI-22)}, doi = {10.1007/978-3-031-14771-5\_4}, isbn = {9783031147708}, note = {INFO COMPLETE (Now published, checked and updated 15/12/2022 LM; When out of embargo [30 Nov 2023], please delete working paper 25/10/2022 LM; Record added by contact, updated 24/10/2022 LM) PERMISSION GRANTED (version = AAM; embargo = 12 months (after publication); licence = Pub's own; POLICY = https://www.springernature.com/gp/open-research/policies/book-policies 24/10/2022 LM) DOCUMENT READY (AAM rec'd from contact) ADDITIONAL INFO - Contact: GAYANI NANAYAKKARA Set Statement (This version of the contribution has been accepted for publication, after peer review (when applicable) but is not the Version of Record and does not reflect post-acceptance improvements, or any corrections. The Version of Record is available online at: https://doi.org/10.1007/978-3-031-14771-5\_4. Use of this Accepted Version is subject to the publisher's Accepted Manuscript terms of use [https://www.springernature.com/gp/open-research/policies/accepted-manuscript-terms]. )}, pages = {41-57}, publicationstatus = {Published}, publisher = {Springer}, url = {https://rgu-repository.worktribe.com/output/1686809}, keyword = {Artificial Intelligence & Reasoning (AIR), Health & Wellbeing, Living in a Digital World, Clinical dialogue transcription, Automatic speech recognition, Error correction}, year = {2022}, author = {Nanayakkara, Gayani and Wiratunga, Nirmalie and Corsar, David and Martin, Kyle and Wijekoon, Anjana} editor = {Shaban-Nejad, Arash and Michalowski, Martin and Bianco, Simone} }