2018
|
Cotterell, Ryan; Kirov, Christo; Hulden, Mans; Eisner, Jason Quantifying the Trade-off Between Two Types of
Morphological Complexity Inproceedings Proceedings of the Society for Computation in
Linguistics (SCiL), Salt Lake City, 2018. Links | BibTeX @inproceedings{cotterell-et-al-2018-scil,
title = {Quantifying the Trade-off Between Two Types of
Morphological Complexity},
author = {Ryan Cotterell and Christo Kirov and Mans Hulden and Jason Eisner},
url = {http://cs.jhu.edu/~jason/papers/#cotterell-et-al-2018-scil},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the Society for Computation in
Linguistics (SCiL)},
volume = {1},
number = {30},
address = {Salt Lake City},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Moro-Velazquez, Laureano; Gomez-Garcia, Jorge Andres; Godino-Llorente, Juan Ignacio; Villalba, Jesus; Orozco-Arroyave, Juan Rafael; Dehak, Najim Analysis of speaker recognition methodologies and the influence of kinetic changes to automatically detect Parkinson's Disease Journal Article Applied Soft Computing, 62 , pp. 649–666, 2018. BibTeX @article{moro2018analysis,
title = {Analysis of speaker recognition methodologies and the influence of kinetic changes to automatically detect Parkinson's Disease},
author = {Laureano Moro-Velazquez and Jorge Andres Gomez-Garcia and Juan Ignacio Godino-Llorente and Jesus Villalba and Juan Rafael Orozco-Arroyave and Najim Dehak},
year = {2018},
date = {2018-01-01},
journal = {Applied Soft Computing},
volume = {62},
pages = {649--666},
publisher = {Elsevier},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Wu, Shijie; Shapiro, Pamela; Cotterell, Ryan Hard Non-Monotonic Attention for Character-Level Transduction Inproceedings Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 4425–4438, Brussels, Belgium, 2018. Links | BibTeX @inproceedings{D18-1473,
title = {Hard Non-Monotonic Attention for Character-Level Transduction},
author = {Shijie Wu and Pamela Shapiro and Ryan Cotterell},
url = {http://aclweb.org/anthology/D18-1473},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
pages = {4425--4438},
address = {Brussels, Belgium},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
'i, Moro-Velázquez Laureano Gómez-Garc J-A; Godino-Llorente, Juan Ignacio; Rusz, Jan; Skodda, Sabine; Arroyave, Orozco J R; Noth, Elmar; Dehak, Najim Study of the automatic detection of Parkison's Disease based on speaker recognition technologies and allophonic distillation Inproceedings Engineering in Medicine and Biology Society (EMBC),2018 40th Annual International Conference of the IEEE., pp. 1404–1407, 2018. BibTeX @inproceedings{moro2018study,
title = {Study of the automatic detection of Parkison's Disease based on speaker recognition technologies and allophonic distillation},
author = {J-A Moro-Velázquez Laureano Gómez-Garc{'i}a and Juan Ignacio Godino-Llorente and Jan Rusz and Sabine Skodda and J R Orozco Arroyave and Elmar Noth and Najim Dehak},
year = {2018},
date = {2018-01-01},
booktitle = {Engineering in Medicine and Biology Society (EMBC),2018 40th Annual International Conference of the IEEE.},
pages = {1404--1407},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Rouhizadeh, Masoud; Hatef, Elham; Dredze, Mark; Chute, Christopher; Kharrazi, Hadi Identifying Social Determinants of Health from Clinical Notes: A Rule-Based Approach Inproceedings AMIA Natural Language Processing Working Group Pre-Symposium, 2018. BibTeX @inproceedings{Rouhizadeh:2018fq,
title = {Identifying Social Determinants of Health from Clinical Notes: A Rule-Based Approach},
author = {Masoud Rouhizadeh and Elham Hatef and Mark Dredze and Christopher Chute and Hadi Kharrazi},
year = {2018},
date = {2018-01-01},
booktitle = {AMIA Natural Language Processing Working Group Pre-Symposium},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Broniatowski, David A; Jamison, Amelia M; Qi, SiHua; AlKulaib, Lulwah; Chen, Tao; Benton, Adrian; Quinn, Sandra C; Dredze, Mark Weaponized Health Communication: Twitter Bots and Russian Trolls Amplify the Vaccine Debate Journal Article American Journal of Public Health (AJPH), 2018. Abstract | BibTeX @article{broniatowski:2018a,
title = {Weaponized Health Communication: Twitter Bots and Russian Trolls Amplify the Vaccine Debate},
author = {David A Broniatowski and Amelia M Jamison and SiHua Qi and Lulwah AlKulaib and Tao Chen and Adrian Benton and Sandra C Quinn and Mark Dredze},
year = {2018},
date = {2018-01-01},
journal = {American Journal of Public Health (AJPH)},
abstract = {Objectives. To understand how Twitter bots and trolls (``bots'') promote online health content.
Methods. We compared bots' to average users' rates of vaccine-relevant messages, which we collected online from July 2014 through September 2017. We estimated the likelihood that users were bots, comparing proportions of polarized and antivaccine tweets across user types. We conducted a content analysis of a Twitter hashtag associated with Russian troll activity.
Results. Compared with average users, Russian trolls (χ2(1) = 102.0; P < .001), sophisticated bots (χ2(1) = 28.6; P < .001), and ``content polluters'' (χ2(1) = 7.0; P < .001) tweeted about vaccination at higher rates. Whereas content polluters posted more antivaccine content (χ2(1) = 11.18; P < .001), Russian trolls amplified both sides. Unidentifiable accounts were more polarized (χ2(1) = 12.1; P < .001) and antivaccine (χ2(1) = 35.9; P < .001). Analysis of the Russian troll hashtag showed that its messages were more political and divisive.
Conclusions. Whereas bots that spread malware and unsolicited content disseminated antivaccine messages, Russian trolls promoted discord. Accounts masquerading as legitimate users create false equivalency, eroding public consensus on vaccination.
Public Health Implications. Directly confronting vaccine skeptics enables bots to legitimize the vaccine debate. More research is needed to determine how best to combat bot-driven content. (Am J Public Health. Published online ahead of print August 23, 2018: e1--e7. doi:10.2105/AJPH.2018.304567)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Objectives. To understand how Twitter bots and trolls (``bots'') promote online health content.
Methods. We compared bots' to average users' rates of vaccine-relevant messages, which we collected online from July 2014 through September 2017. We estimated the likelihood that users were bots, comparing proportions of polarized and antivaccine tweets across user types. We conducted a content analysis of a Twitter hashtag associated with Russian troll activity.
Results. Compared with average users, Russian trolls (χ2(1) = 102.0; P < .001), sophisticated bots (χ2(1) = 28.6; P < .001), and ``content polluters'' (χ2(1) = 7.0; P < .001) tweeted about vaccination at higher rates. Whereas content polluters posted more antivaccine content (χ2(1) = 11.18; P < .001), Russian trolls amplified both sides. Unidentifiable accounts were more polarized (χ2(1) = 12.1; P < .001) and antivaccine (χ2(1) = 35.9; P < .001). Analysis of the Russian troll hashtag showed that its messages were more political and divisive.
Conclusions. Whereas bots that spread malware and unsolicited content disseminated antivaccine messages, Russian trolls promoted discord. Accounts masquerading as legitimate users create false equivalency, eroding public consensus on vaccination.
Public Health Implications. Directly confronting vaccine skeptics enables bots to legitimize the vaccine debate. More research is needed to determine how best to combat bot-driven content. (Am J Public Health. Published online ahead of print August 23, 2018: e1--e7. doi:10.2105/AJPH.2018.304567) |
Benton, Adrian; Dredze, Mark Using Author Embeddings to Improve Tweet Stance Classification Inproceedings EMNLP Workshop on Noisy User-generated Text (W-NUT), 2018. Abstract | BibTeX @inproceedings{Benton:2018dk,
title = {Using Author Embeddings to Improve Tweet Stance Classification},
author = {Adrian Benton and Mark Dredze},
year = {2018},
date = {2018-01-01},
booktitle = {EMNLP Workshop on Noisy User-generated Text (W-NUT)},
abstract = {Many social media classification tasks analyze the content of a message, but do not consider the context of the message. For example, in tweet stance classification -- where a tweet is categorized according to a view-point it espouses -- the expressed viewpoint depends on latent beliefs held by the user. In this paper we investigate whether incorporating knowledge about the author can improve tweet stance classification. Furthermore, since author information and embeddings are often unavailable for labeled training examples, we propose a semi-supervised pre-training method to predict user embeddings. Although the neural stance classifiers we learn are often outperformed by a baseline SVM, author embedding pre-training yields improvements over a non-pre-trained neural network on four out of five domains in the SemEval 2016 6A tweet stance classification task. In a tweet gun control stance classification dataset, improvements from pre-training are only apparent when training data is limited.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Many social media classification tasks analyze the content of a message, but do not consider the context of the message. For example, in tweet stance classification -- where a tweet is categorized according to a view-point it espouses -- the expressed viewpoint depends on latent beliefs held by the user. In this paper we investigate whether incorporating knowledge about the author can improve tweet stance classification. Furthermore, since author information and embeddings are often unavailable for labeled training examples, we propose a semi-supervised pre-training method to predict user embeddings. Although the neural stance classifiers we learn are often outperformed by a baseline SVM, author embedding pre-training yields improvements over a non-pre-trained neural network on four out of five domains in the SemEval 2016 6A tweet stance classification task. In a tweet gun control stance classification dataset, improvements from pre-training are only apparent when training data is limited. |
Wood-Doughty, Zachary; Andrews, Nicholas; Dredze, Mark Convolutions Are All You Need (For Classifying Character Sequences) Inproceedings EMNLP Workshop on Noisy User-generated Text (W-NUT), 2018. Abstract | BibTeX @inproceedings{Wood-Doughty:2018qd,
title = {Convolutions Are All You Need (For Classifying Character Sequences)},
author = {Zachary Wood-Doughty and Nicholas Andrews and Mark Dredze},
year = {2018},
date = {2018-01-01},
booktitle = {EMNLP Workshop on Noisy User-generated Text (W-NUT)},
abstract = {While recurrent neural networks (RNNs) are widely used for text classification, they demonstrate poor performance and slow convergence when trained on long sequences. When text is modeled as characters instead of words, the longer sequences make RNNs a poor choice. Convolutional neural networks (CNNs), although somewhat less ubiquitous than RNNs, have an internal structure more appropriate for long-distance character dependencies. To better understand how CNNs and RNNs differ in handling long sequences, we use them for text classification tasks in several character-level social media datasets. The CNN models vastly outperform the RNN models in our experiments, suggesting that CNNs are superior to RNNs at learning to classify character-level data.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
While recurrent neural networks (RNNs) are widely used for text classification, they demonstrate poor performance and slow convergence when trained on long sequences. When text is modeled as characters instead of words, the longer sequences make RNNs a poor choice. Convolutional neural networks (CNNs), although somewhat less ubiquitous than RNNs, have an internal structure more appropriate for long-distance character dependencies. To better understand how CNNs and RNNs differ in handling long sequences, we use them for text classification tasks in several character-level social media datasets. The CNN models vastly outperform the RNN models in our experiments, suggesting that CNNs are superior to RNNs at learning to classify character-level data. |
Sekara, Vedran; Rutherford, Alex; Mann, Gideon; Dredze, Mark; Adler, Natalia; 'i, Manuel Garc Trends in the Adoption of Corporate Child Labor Policies: An Analysis with Bloomberg Terminal ESG Data Inproceedings Bloomberg Data for Good Exchange, 2018. Abstract | BibTeX @inproceedings{Sekara:2018uo,
title = {Trends in the Adoption of Corporate Child Labor Policies: An Analysis with Bloomberg Terminal ESG Data},
author = {Vedran Sekara and Alex Rutherford and Gideon Mann and Mark Dredze and Natalia Adler and Manuel Garc 'i},
year = {2018},
date = {2018-01-01},
booktitle = {Bloomberg Data for Good Exchange},
abstract = {Over 150 million children worldwide are estimated to be engaged in some form of child labor, with nearly one in every four children between the ages of 5 and 14 engaged in potentially harmful work in the world's poorest countries. Child labor compromises children's physical, mental, social and educational development. It also reinforces cycles of poverty, negatively affecting the ecosystem necessary for business to thrive in a sustainable manner. Against a backdrop of multiple international and national laws against child labor, corporations also adopt policies on child labor. However, new methods of globally dispersed production have made this commitment to sustainability issues across supply chains more challenging. In this work we examine, through the lens of Bloomberg's environmental, social and governance (ESG) and financial data, trends in corporate child labor policies and their relationship to classic economic variables as a first step in understanding sustainability issues across global supply networks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Over 150 million children worldwide are estimated to be engaged in some form of child labor, with nearly one in every four children between the ages of 5 and 14 engaged in potentially harmful work in the world's poorest countries. Child labor compromises children's physical, mental, social and educational development. It also reinforces cycles of poverty, negatively affecting the ecosystem necessary for business to thrive in a sustainable manner. Against a backdrop of multiple international and national laws against child labor, corporations also adopt policies on child labor. However, new methods of globally dispersed production have made this commitment to sustainability issues across supply chains more challenging. In this work we examine, through the lens of Bloomberg's environmental, social and governance (ESG) and financial data, trends in corporate child labor policies and their relationship to classic economic variables as a first step in understanding sustainability issues across global supply networks. |
Wood-Doughty, Zachary; Shpitser, Ilya; Dredze, Mark Challenges of Using Text Classifiers for Causal Inference Inproceedings Empirical Methods in Natural Language Processing (EMNLP), 2018. Abstract | BibTeX @inproceedings{Wood-Doughty:2018qe,
title = {Challenges of Using Text Classifiers for Causal Inference},
author = {Zachary Wood-Doughty and Ilya Shpitser and Mark Dredze},
year = {2018},
date = {2018-01-01},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
abstract = {Causal understanding is essential for many kinds of decision-making, but causal inference from observational data has typically only been applied to structured, low-dimensional datasets. While text classifiers produce low-dimensional outputs, their use in causal inference has not previously been studied. To facilitate causal analyses based on language data, we consider the role that text classifiers can play in causal inference through established modeling mechanisms from the causality literature on missing data and measurement error. We demonstrate how to conduct causal analyses using text classifiers on simulated and Yelp data, and discuss the opportunities and challenges of future work that uses text data in causal inference.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Causal understanding is essential for many kinds of decision-making, but causal inference from observational data has typically only been applied to structured, low-dimensional datasets. While text classifiers produce low-dimensional outputs, their use in causal inference has not previously been studied. To facilitate causal analyses based on language data, we consider the role that text classifiers can play in causal inference through established modeling mechanisms from the causality literature on missing data and measurement error. We demonstrate how to conduct causal analyses using text classifiers on simulated and Yelp data, and discuss the opportunities and challenges of future work that uses text data in causal inference. |
Ayers, John W; Caputi, Theodore L; Nebeker, Camille; Dredze, Mark Don't quote me: reverse identification of research participants in social media studies Journal Article Nature Digital Medicine, 1 (30), 2018. Abstract | BibTeX @article{Ayers:2018eb,
title = {Don't quote me: reverse identification of research participants in social media studies},
author = {John W Ayers and Theodore L Caputi and Camille Nebeker and Mark Dredze},
year = {2018},
date = {2018-01-01},
journal = {Nature Digital Medicine},
volume = {1},
number = {30},
abstract = {We investigated if participants in social media surveillance studies could be reverse identified by reviewing all articles published on PubMed in 2015 or 2016 with the words ``Twitter'' and either ``read,'' ``coded,'' or ``content'' in the title or abstract. Seventy-two percent (95% CI: 63--80) of articles quoted at least one participant's tweet and searching for the quoted content led to the participant 84% (95% CI: 74--91) of the time. Twenty-one percent (95% CI: 13--29) of articles disclosed a participant's Twitter username thereby making the participant immediately identifiable. Only one article reported obtaining consent to disclose identifying information and institutional review board (IRB) involvement was mentioned in only 40% (95% CI: 31--50) of articles, of which 17% (95% CI: 10--25) received IRB-approval and 23% (95% CI:16--32) were deemed exempt. Biomedical publications are routinely including identifiable information by quoting tweets or revealing usernames which, in turn, violates ICMJE ethical standards governing scientific ethics, even though said content is scientifically unnecessary. We propose that authors convey aggregate findings without revealing participants' identities, editors refuse to publish reports that reveal a participant's identity, and IRBs attend to these privacy issues when reviewing studies involving social media data. These strategies together will ensure participants are protected going forward.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
We investigated if participants in social media surveillance studies could be reverse identified by reviewing all articles published on PubMed in 2015 or 2016 with the words ``Twitter'' and either ``read,'' ``coded,'' or ``content'' in the title or abstract. Seventy-two percent (95% CI: 63--80) of articles quoted at least one participant's tweet and searching for the quoted content led to the participant 84% (95% CI: 74--91) of the time. Twenty-one percent (95% CI: 13--29) of articles disclosed a participant's Twitter username thereby making the participant immediately identifiable. Only one article reported obtaining consent to disclose identifying information and institutional review board (IRB) involvement was mentioned in only 40% (95% CI: 31--50) of articles, of which 17% (95% CI: 10--25) received IRB-approval and 23% (95% CI:16--32) were deemed exempt. Biomedical publications are routinely including identifiable information by quoting tweets or revealing usernames which, in turn, violates ICMJE ethical standards governing scientific ethics, even though said content is scientifically unnecessary. We propose that authors convey aggregate findings without revealing participants' identities, editors refuse to publish reports that reveal a participant's identity, and IRBs attend to these privacy issues when reviewing studies involving social media data. These strategies together will ensure participants are protected going forward. |
Lama, Yuki; Chen, Tao; Dredze, Mark; Jamison, Amelia M; Quinn, Sandra C; Broniatowski, David A Discordance Between Human Papillomavirus Twitter Images and Disparities in Human Papillomavirus Risk and Disease in the United States: Mixed-Methods Analysis Journal Article Journal of Medical Internet Research (JMIR), 2018. Abstract | BibTeX @article{Lama:2018ss,
title = {Discordance Between Human Papillomavirus Twitter Images and Disparities in Human Papillomavirus Risk and Disease in the United States: Mixed-Methods Analysis},
author = {Yuki Lama and Tao Chen and Mark Dredze and Amelia M Jamison and Sandra C Quinn and David A Broniatowski},
year = {2018},
date = {2018-01-01},
journal = {Journal of Medical Internet Research (JMIR)},
abstract = {Background: Racial and ethnic minorities are disproportionately affected by human papillomavirus (HPV)-related cancer, many of which could have been prevented with vaccination. Yet, the initiation and completion rates of HPV vaccination remain low among these populations. Given the importance of social media platforms for health communication, we examined US-based HPV images on Twitter. We explored inconsistencies between the demographics represented in HPV images and the populations that experience the greatest burden of HPV-related disease.
Objective: The objective of our study was to observe whether HPV images on Twitter reflect the actual burden of disease by select demographics and determine to what extent Twitter accounts utilized images that reflect the burden of disease in their health communication messages.
Methods: We identified 456 image tweets about HPV that contained faces posted by US users between November 11, 2014 and August 8, 2016. We identified images containing at least one human face and utilized Face++ software to automatically extract the gender, age, and race of each face. We manually annotated the source accounts of these tweets into 3 types as follows: government (38/298, 12.8%), organizations (161/298, 54.0%), and individual (99/298, 33.2%) and topics (news, health, and other) to examine how images varied by message source.
Results: Findings reflected the racial demographics of the US population but not the disease burden (795/1219, 65.22% white faces; 140/1219, 11.48% black faces; 71/1219, 5.82% Asian faces; and 213/1219, 17.47% racially ambiguous faces). Gender disparities were evident in the image faces; 71.70% (874/1219) represented female faces, whereas only 27.89% (340/1219) represented male faces. Among the 11-26 years age group recommended to receive HPV vaccine, HPV images contained more female-only faces (214/616, 34.3%) than males (37/616, 6.0%); the remainder of images included both male and female faces (365/616, 59.3%). Gender and racial disparities were present across different image sources. Faces from government sources were more likely to depict females (n=44) compared with males (n=16). Of male faces, 80% (12/15) of youth and 100% (1/1) of adults were white. News organization sources depicted high proportions of white faces (28/38, 97% of female youth and 12/12, 100% of adult males). Face++ identified fewer faces compared with manual annotation because of limitations with detecting multiple, small, or blurry faces. Nonetheless, Face++ achieved a high degree of accuracy with respect to gender, race, and age compared with manual annotation.
Conclusions: This study reveals critical differences between the demographics reflected in HPV images and the actual burden of disease. Racial minorities are less likely to appear in HPV images despite higher rates of HPV incidence. Health communication efforts need to represent populations at risk better if we seek to reduce disparities in HPV infection.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Background: Racial and ethnic minorities are disproportionately affected by human papillomavirus (HPV)-related cancer, many of which could have been prevented with vaccination. Yet, the initiation and completion rates of HPV vaccination remain low among these populations. Given the importance of social media platforms for health communication, we examined US-based HPV images on Twitter. We explored inconsistencies between the demographics represented in HPV images and the populations that experience the greatest burden of HPV-related disease.
Objective: The objective of our study was to observe whether HPV images on Twitter reflect the actual burden of disease by select demographics and determine to what extent Twitter accounts utilized images that reflect the burden of disease in their health communication messages.
Methods: We identified 456 image tweets about HPV that contained faces posted by US users between November 11, 2014 and August 8, 2016. We identified images containing at least one human face and utilized Face++ software to automatically extract the gender, age, and race of each face. We manually annotated the source accounts of these tweets into 3 types as follows: government (38/298, 12.8%), organizations (161/298, 54.0%), and individual (99/298, 33.2%) and topics (news, health, and other) to examine how images varied by message source.
Results: Findings reflected the racial demographics of the US population but not the disease burden (795/1219, 65.22% white faces; 140/1219, 11.48% black faces; 71/1219, 5.82% Asian faces; and 213/1219, 17.47% racially ambiguous faces). Gender disparities were evident in the image faces; 71.70% (874/1219) represented female faces, whereas only 27.89% (340/1219) represented male faces. Among the 11-26 years age group recommended to receive HPV vaccine, HPV images contained more female-only faces (214/616, 34.3%) than males (37/616, 6.0%); the remainder of images included both male and female faces (365/616, 59.3%). Gender and racial disparities were present across different image sources. Faces from government sources were more likely to depict females (n=44) compared with males (n=16). Of male faces, 80% (12/15) of youth and 100% (1/1) of adults were white. News organization sources depicted high proportions of white faces (28/38, 97% of female youth and 12/12, 100% of adult males). Face++ identified fewer faces compared with manual annotation because of limitations with detecting multiple, small, or blurry faces. Nonetheless, Face++ achieved a high degree of accuracy with respect to gender, race, and age compared with manual annotation.
Conclusions: This study reveals critical differences between the demographics reflected in HPV images and the actual burden of disease. Racial minorities are less likely to appear in HPV images despite higher rates of HPV incidence. Health communication efforts need to represent populations at risk better if we seek to reduce disparities in HPV infection. |
Smith, Katherine; Weiger, Caitlin; Fields, Errol; Cohen, Joanna E; Moran, Meghan; Dredze, Mark Conducting public health surveillance research on consumer product websites Inproceedings American Public Health Association (APHA), 2018. BibTeX @inproceedings{Smith:2018jl,
title = {Conducting public health surveillance research on consumer product websites},
author = {Katherine Smith and Caitlin Weiger and Errol Fields and Joanna E Cohen and Meghan Moran and Mark Dredze},
year = {2018},
date = {2018-01-01},
booktitle = {American Public Health Association (APHA)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Zhou, Yuchen; Dredze, Mark; Broniatowski, David A; Adler, William Gab: The Alt-Right Social Media Platform Inproceedings International Conference on Social Computing, Behavioral-Cultural Modeling & Prediction and Behavior Representation in Modeling and Simulation (SBP-BRiMS), 2018. Abstract | BibTeX @inproceedings{Zhou:2018uk,
title = {Gab: The Alt-Right Social Media Platform},
author = {Yuchen Zhou and Mark Dredze and David A Broniatowski and William Adler},
year = {2018},
date = {2018-01-01},
booktitle = {International Conference on Social Computing, Behavioral-Cultural Modeling & Prediction and Behavior Representation in Modeling and Simulation (SBP-BRiMS)},
abstract = {This study proposes the use of Gab as a vehicle for political science research regarding modern American politics and the Alt-Right population. We collect several million Gab messages posted on Gab web- site from August 2016 to February 2018. We conduct a preliminary analysis of Gab platform related to site use, growth and topics, which shows that Gab is a reasonable resource for Alt-Right study.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
This study proposes the use of Gab as a vehicle for political science research regarding modern American politics and the Alt-Right population. We collect several million Gab messages posted on Gab web- site from August 2016 to February 2018. We conduct a preliminary analysis of Gab platform related to site use, growth and topics, which shows that Gab is a reasonable resource for Alt-Right study. |
Wolfe, Travis; Carrell, Annabelle; Dredze, Mark; Durme, Benjamin Van Summarizing Entities using Distantly Supervised Information Extractors Inproceedings SIGIR Workshop on Knowledge Graphs and Semantics for Text Retrieval, Analysis, and Understanding (KG4IR), 2018. BibTeX @inproceedings{Wolfe:2018il,
title = {Summarizing Entities using Distantly Supervised Information Extractors},
author = {Travis Wolfe and Annabelle Carrell and Mark Dredze and Benjamin Van Durme},
year = {2018},
date = {2018-01-01},
booktitle = {SIGIR Workshop on Knowledge Graphs and Semantics for Text Retrieval, Analysis, and Understanding (KG4IR)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Hammond, Alexis S; Paul, Michael J; Hobelmann, Gregory J; Koratana, Animesh R; Dredze, Mark; Chisolm, Margaret S Perceived Attitudes About Substance Use in Anonymous Social Media Posts Near College Campuses Journal Article Journal of Medical Internet Research Mental Health (JMIR MH), 2018. BibTeX @article{hammond:2018lq,
title = {Perceived Attitudes About Substance Use in Anonymous Social Media Posts Near College Campuses},
author = {Alexis S Hammond and Michael J Paul and Gregory J Hobelmann and Animesh R Koratana and Mark Dredze and Margaret S Chisolm},
year = {2018},
date = {2018-01-01},
journal = {Journal of Medical Internet Research Mental Health (JMIR MH)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Wood-Doughty, Zach; Shpitser, Ilya; Dredze, Mark Challenges of Using Text Classifiers for Causal Inference Inproceedings Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 4586–4598, 2018. BibTeX @inproceedings{wood2018challenges,
title = {Challenges of Using Text Classifiers for Causal Inference},
author = {Zach Wood-Doughty and Ilya Shpitser and Mark Dredze},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
pages = {4586--4598},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Wood-Doughty, Zachary; Mahajan, Praateek; Dredze, Mark Johns Hopkins or johnny-hopkins: Classifying Individuals versus Organizations on Twitter Inproceedings NAACL Workshop on Computational Modeling of People's Opinions, Personality, and Emotions in Social Media, 2018. BibTeX @inproceedings{Wood-Doughty:2018:peoples2,
title = {Johns Hopkins or johnny-hopkins: Classifying Individuals versus Organizations on Twitter},
author = {Zachary Wood-Doughty and Praateek Mahajan and Mark Dredze},
year = {2018},
date = {2018-01-01},
booktitle = {NAACL Workshop on Computational Modeling of People's Opinions, Personality, and Emotions in Social Media},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Wood-Doughty, Zachary; Andrews, Nicholas; Marvin, Rebecca; Dredze, Mark Predicting Twitter User Demographics from Names Alone Inproceedings NAACL Workshop on Computational Modeling of People's Opinions, Personality, and Emotions in Social Media, 2018. BibTeX @inproceedings{Wood-Doughty:2018:peoples1,
title = {Predicting Twitter User Demographics from Names Alone},
author = {Zachary Wood-Doughty and Nicholas Andrews and Rebecca Marvin and Mark Dredze},
year = {2018},
date = {2018-01-01},
booktitle = {NAACL Workshop on Computational Modeling of People's Opinions, Personality, and Emotions in Social Media},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Caputi, Theodore L; Leas, Eric C; Dredze, Mark; Ayers, John W Online Sales of Marijuana: An Unrecognized Public Health Dilemma Journal Article American Journal of Preventive Medicine (AJPM), 2018. BibTeX @article{Caputi:2018dk,
title = {Online Sales of Marijuana: An Unrecognized Public Health Dilemma},
author = {Theodore L Caputi and Eric C Leas and Mark Dredze and John W Ayers},
year = {2018},
date = {2018-01-01},
journal = {American Journal of Preventive Medicine (AJPM)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Benton, Adrian; Dredze, Mark Deep Dirichlet Multinomial Regression Inproceedings North American Chapter of the Association for Computational Linguistics (NAACL), 2018. BibTeX @inproceedings{Benton:2018dn,
title = {Deep Dirichlet Multinomial Regression},
author = {Adrian Benton and Mark Dredze},
year = {2018},
date = {2018-01-01},
booktitle = {North American Chapter of the Association for Computational Linguistics (NAACL)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Hadian, Hossein; Sameti, Hossein; Povey, Daniel; Khudanpur, Sanjeev End-to-end Speech Recognition Using Lattice-free MMI 2018. BibTeX @unknown{unknownb,
title = {End-to-end Speech Recognition Using Lattice-free MMI},
author = {Hossein Hadian and Hossein Sameti and Daniel Povey and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
pages = {12-16},
keywords = {},
pubstate = {published},
tppubtype = {unknown}
}
|
Chen, Tao; Dredze, Mark Vaccine Images on Twitter: What is Shared and Why Journal Article Journal of Medical Internet Research (JMIR), 2018. BibTeX @article{Chen:2018tg,
title = {Vaccine Images on Twitter: What is Shared and Why},
author = {Tao Chen and Mark Dredze},
year = {2018},
date = {2018-01-01},
journal = {Journal of Medical Internet Research (JMIR)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Povey, Daniel; Hadian, Hossein; Ghahremani, Pegah; Li, Ke; Khudanpur, Sanjeev A Time-Restricted Self-Attention Layer for ASR Journal Article 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5874-5878, 2018. BibTeX @article{Povey2018ATS,
title = {A Time-Restricted Self-Attention Layer for ASR},
author = {Daniel Povey and Hossein Hadian and Pegah Ghahremani and Ke Li and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
journal = {2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {5874-5878},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Snyder, David; Garcia-Romero, Daniel; Sell, Gregory; Povey, Daniel; Khudanpur, Sanjeev X-vectors: Robust DNN embeddings for speaker recognition Journal Article Submitted to ICASSP, 2018. BibTeX @article{snyder2018x,
title = {X-vectors: Robust DNN embeddings for speaker recognition},
author = {David Snyder and Daniel Garcia-Romero and Gregory Sell and Daniel Povey and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
journal = {Submitted to ICASSP},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Manohar, Vimal; Hadian, Hossein; Povey, Daniel; Khudanpur, Sanjeev Semi-supervised training of acoustic models using lattice-free MMI Inproceedings 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4844–4848, IEEE 2018. BibTeX @inproceedings{manohar2018semi,
title = {Semi-supervised training of acoustic models using lattice-free MMI},
author = {Vimal Manohar and Hossein Hadian and Daniel Povey and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
booktitle = {2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {4844--4848},
organization = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Watanabe, Shinji; Virtanen, Tuomas; Kolossa, Dorothea Application of Source Separation to Robust Speech Analysis and Recognition Book Chapter Audio Source Separation and Speech Enhancement, Chapter 17, pp. 377-411, Wiley-Blackwell, 2018, ISBN: 9781119279860. Abstract | Links | BibTeX @inbook{doi:10.1002/9781119279860.ch17,
title = {Application of Source Separation to Robust Speech Analysis and Recognition},
author = {Shinji Watanabe and Tuomas Virtanen and Dorothea Kolossa},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/9781119279860.ch17},
doi = {10.1002/9781119279860.ch17},
isbn = {9781119279860},
year = {2018},
date = {2018-01-01},
booktitle = {Audio Source Separation and Speech Enhancement},
pages = {377-411},
publisher = {Wiley-Blackwell},
chapter = {17},
abstract = {Summary This chapter describes applications of source separation techniques to robust speech analysis and recognition, including automatic speech recognition, speaker/language identification, emotion and paralinguistic analysis, and audiovisual analysis. These are the most successful applications in audio and speech processing with various commercial products. However, the robustness against noise or non-target speech still remains a challenging issue, and source separation and speech enhancement techniques are gathering large attention in the community. This chapter systematically describes how source separation and speech enhancement techniques are applied to improve the robustness of these applications.},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
Summary This chapter describes applications of source separation techniques to robust speech analysis and recognition, including automatic speech recognition, speaker/language identification, emotion and paralinguistic analysis, and audiovisual analysis. These are the most successful applications in audio and speech processing with various commercial products. However, the robustness against noise or non-target speech still remains a challenging issue, and source separation and speech enhancement techniques are gathering large attention in the community. This chapter systematically describes how source separation and speech enhancement techniques are applied to improve the robustness of these applications. |
Snyder, David; Garcia-Romero, Daniel; McCree, Alan; Sell, Gregory; Povey, Daniel; Khudanpur, Sanjeev Spoken language recognition using x-vectors Inproceedings Odyssey: The Speaker and Language Recognition Workshop, Les Sables d’Olonne, 2018. BibTeX @inproceedings{snyder2018spoken,
title = {Spoken language recognition using x-vectors},
author = {David Snyder and Daniel Garcia-Romero and Alan McCree and Gregory Sell and Daniel Povey and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
booktitle = {Odyssey: The Speaker and Language Recognition Workshop, Les Sables d’Olonne},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Chen, Zhehuai; Luitjens, Justin; Xu, Hainan; Wang, Yiming; Povey, Daniel; Khudanpur, Sanjeev A GPU-based WFST Decoder with Exact Lattice Generation Journal Article arXiv preprint arXiv:1804.03243, 2018. BibTeX @article{chen2018gpu,
title = {A GPU-based WFST Decoder with Exact Lattice Generation},
author = {Zhehuai Chen and Justin Luitjens and Hainan Xu and Yiming Wang and Daniel Povey and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
journal = {arXiv preprint arXiv:1804.03243},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Povey, Daniel; Cheng, Gaofeng; Wang, Yiming; Li, Ke; Xu, Hainan; Yarmohamadi, Mahsa; Khudanpur, Sanjeev Semi-orthogonal low-rank matrix factorization for deep neural networks Journal Article INTERSPEECH (2018, submitted), 2018. BibTeX @article{povey2018semi,
title = {Semi-orthogonal low-rank matrix factorization for deep neural networks},
author = {Daniel Povey and Gaofeng Cheng and Yiming Wang and Ke Li and Hainan Xu and Mahsa Yarmohamadi and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
journal = {INTERSPEECH (2018, submitted)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Seki, Hiroshi; Hori, Takaaki; Watanabe, Shinji; Roux, Jonathan Le; Hershey, John R Inproceedings Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2620–2630, Melbourne, Australia, 2018. Links | BibTeX @inproceedings{P18-1244,
author = {Hiroshi Seki and Takaaki Hori and Shinji Watanabe and Jonathan Le Roux and John R Hershey},
url = {http://aclweb.org/anthology/P18-1244},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {2620--2630},
address = {Melbourne, Australia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Zhu, Yingke; Ko, Tom; Snyder, David; Mak, Brian; Povey, Daniel Self-Attentive Speaker Embeddings for Text-Independent Speaker Verification Journal Article Proc. Interspeech 2018, pp. 3573–3577, 2018. BibTeX @article{zhu2018self,
title = {Self-Attentive Speaker Embeddings for Text-Independent Speaker Verification},
author = {Yingke Zhu and Tom Ko and David Snyder and Brian Mak and Daniel Povey},
year = {2018},
date = {2018-01-01},
journal = {Proc. Interspeech 2018},
pages = {3573--3577},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Li, Ke; Xu, Hainan; Wang, Yiming; Povey, Daniel; Khudanpur, Sanjeev Recurrent Neural Network Language Model Adaptation for Conversational Speech Recognition Journal Article Proc. Interspeech 2018, pp. 3373–3377, 2018. BibTeX @article{li2018recurrent,
title = {Recurrent Neural Network Language Model Adaptation for Conversational Speech Recognition},
author = {Ke Li and Hainan Xu and Yiming Wang and Daniel Povey and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
journal = {Proc. Interspeech 2018},
pages = {3373--3377},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Ghahremani, Pegah; Hadian, Hossein; Lv, Hang; Povey, Daniel; Khudanpur, Sanjeev Acoustic Modeling from Frequency Domain Representations of Speech Journal Article Proc. Interspeech 2018, pp. 1596–1600, 2018. BibTeX @article{ghahremani2018acoustic,
title = {Acoustic Modeling from Frequency Domain Representations of Speech},
author = {Pegah Ghahremani and Hossein Hadian and Hang Lv and Daniel Povey and Sanjeev Khudanpur},
year = {2018},
date = {2018-01-01},
journal = {Proc. Interspeech 2018},
pages = {1596--1600},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Ghahremani, Pegah; Nidadavolu, Phani Sankar; Chen, Nanxin; 'u, Jes; Povey, Daniel; Khudanpur, Sanjeev; Dehak, Najim End-to-End Deep Neural Network Age Estimation Journal Article Proc. Interspeech 2018, pp. 277–281, 2018. BibTeX @article{ghahremani2018end,
title = {End-to-End Deep Neural Network Age Estimation},
author = {Pegah Ghahremani and Phani Sankar Nidadavolu and Nanxin Chen and Jes{'u}s Villalba and Daniel Povey and Sanjeev Khudanpur and Najim Dehak},
year = {2018},
date = {2018-01-01},
journal = {Proc. Interspeech 2018},
pages = {277--281},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Sell, Gregory; Snyder, David; McCree, Alan; Garcia-Romero, Daniel; 'u, Jes; Maciejewski, Matthew; Manohar, Vimal; Dehak, Najim; Povey, Daniel; Watanabe, Shinji; others, Diarization is Hard: Some Experiences and Lessons Learned for the JHU Team in the Inaugural DIHARD Challenge Journal Article Proc. Interspeech 2018, pp. 2808–2812, 2018. BibTeX @article{sell2018diarization,
title = {Diarization is Hard: Some Experiences and Lessons Learned for the JHU Team in the Inaugural DIHARD Challenge},
author = {Gregory Sell and David Snyder and Alan McCree and Daniel Garcia-Romero and Jes{'u}s Villalba and Matthew Maciejewski and Vimal Manohar and Najim Dehak and Daniel Povey and Shinji Watanabe and others},
year = {2018},
date = {2018-01-01},
journal = {Proc. Interspeech 2018},
pages = {2808--2812},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Sarma, Mousmita; Ghahremani, Pegah; Povey, Daniel; Goel, Nagendra Kumar; Sarma, Kandarpa Kumar; Dehak, Najim Emotion Identification from raw speech signals using DNNs Journal Article Proc. Interspeech 2018, pp. 3097–3101, 2018. BibTeX @article{sarma2018emotion,
title = {Emotion Identification from raw speech signals using DNNs},
author = {Mousmita Sarma and Pegah Ghahremani and Daniel Povey and Nagendra Kumar Goel and Kandarpa Kumar Sarma and Najim Dehak},
year = {2018},
date = {2018-01-01},
journal = {Proc. Interspeech 2018},
pages = {3097--3101},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2017
|
Zhang, Sheng; Duh, Kevin; Durme, Benjamin Van Selective Decoding for Cross-lingual Open Information Extraction Inproceedings Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 832–842, Asian Federation of Natural Language Processing, 2017. Links | BibTeX @inproceedings{zhang17selective,
title = {Selective Decoding for Cross-lingual Open Information Extraction},
author = {Sheng Zhang and Kevin Duh and Benjamin Van Durme},
url = {http://www.aclweb.org/anthology/I17-1084},
year = {2017},
date = {2017-11-01},
booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
pages = {832--842},
publisher = {Asian Federation of Natural Language Processing},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Shen, Yelong; Liu, Xiaodong; Duh, Kevin; Gao, Jianfeng An Empirical Analysis of Multiple-Turn Reasoning Strategies in Reading Comprehension Tasks Inproceedings Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 957–966, Asian Federation of Natural Language Processing, 2017. Links | BibTeX @inproceedings{shen17reasoning,
title = {An Empirical Analysis of Multiple-Turn Reasoning Strategies in Reading Comprehension Tasks},
author = {Yelong Shen and Xiaodong Liu and Kevin Duh and Jianfeng Gao},
url = {http://www.aclweb.org/anthology/I17-1096},
year = {2017},
date = {2017-11-01},
booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
pages = {957--966},
publisher = {Asian Federation of Natural Language Processing},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
White, Aaron Steven; Rastogi, Pushpendre; Duh, Kevin; Durme, Benjamin Van Inference is Everything: Recasting Semantic Resources into a Unified Evaluation Framework Inproceedings Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 996–1005, Asian Federation of Natural Language Processing, 2017. Links | BibTeX @inproceedings{white17inference,
title = {Inference is Everything: Recasting Semantic Resources into a Unified Evaluation Framework},
author = {Aaron Steven White and Pushpendre Rastogi and Kevin Duh and Benjamin Van Durme},
url = {http://www.aclweb.org/anthology/I17-1100},
year = {2017},
date = {2017-11-01},
booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
pages = {996--1005},
publisher = {Asian Federation of Natural Language Processing},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Khayrallah, Huda; Kumar, Gaurav; Duh, Kevin; Post, Matt; Koehn, Philipp Neural Lattice Search for Domain Adaptation in Machine Translation Inproceedings Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pp. 20–25, Asian Federation of Natural Language Processing, 2017. Links | BibTeX @inproceedings{khayrallah17adapt,
title = {Neural Lattice Search for Domain Adaptation in Machine Translation},
author = {Huda Khayrallah and Gaurav Kumar and Kevin Duh and Matt Post and Philipp Koehn},
url = {http://www.aclweb.org/anthology/I17-2004},
year = {2017},
date = {2017-11-01},
booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
pages = {20--25},
publisher = {Asian Federation of Natural Language Processing},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Cotterell, Ryan; Duh, Kevin Low-Resource Named Entity Recognition with Cross-lingual, Character-Level Neural Conditional Random Fields Inproceedings Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pp. 91–96, Asian Federation of Natural Language Processing, 2017. Links | BibTeX @inproceedings{cotterell17ner,
title = {Low-Resource Named Entity Recognition with Cross-lingual, Character-Level Neural Conditional Random Fields},
author = {Ryan Cotterell and Kevin Duh},
url = {http://www.aclweb.org/anthology/I17-2016},
year = {2017},
date = {2017-11-01},
booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
pages = {91--96},
publisher = {Asian Federation of Natural Language Processing},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Wang, Dingquan; Peng, Nanyun; Duh, Kevin A Multi-task Learning Approach to Adapting Bilingual Word Embeddings for Cross-lingual Named Entity Recognition Inproceedings Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pp. 383–388, Asian Federation of Natural Language Processing, 2017. Links | BibTeX @inproceedings{wang17multitask,
title = {A Multi-task Learning Approach to Adapting Bilingual Word Embeddings for Cross-lingual Named Entity Recognition},
author = {Dingquan Wang and Nanyun Peng and Kevin Duh},
url = {http://www.aclweb.org/anthology/I17-2065},
year = {2017},
date = {2017-11-01},
booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
pages = {383--388},
publisher = {Asian Federation of Natural Language Processing},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Xu, Hainan; Koehn, Philipp Zipporah: a Fast and Scalable Data Cleaning System for Noisy Web-Crawled Parallel Corpora Inproceedings Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 2935–2940, Association for Computational Linguistics, Copenhagen, Denmark, 2017. Abstract | Links | BibTeX @inproceedings{xu-koehn:2017:EMNLP2017,
title = {Zipporah: a Fast and Scalable Data Cleaning System for Noisy Web-Crawled Parallel Corpora},
author = {Hainan Xu and Philipp Koehn},
url = {https://www.aclweb.org/anthology/D17-1318},
year = {2017},
date = {2017-09-01},
booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
pages = {2935--2940},
publisher = {Association for Computational Linguistics},
address = {Copenhagen, Denmark},
abstract = {We introduce Zipporah, a fast and scalable data cleaning system. We propose a
novel type of bag-of-words translation feature, and train logistic regression
models to classify good data and synthetic noisy data in the proposed feature
space. The trained model is used to score parallel sentences in the data pool
for selection. As shown in experiments, Zipporah selects a high-quality
parallel corpus from a large, mixed quality data pool. In particular, for one
noisy dataset, Zipporah achieves a 2.1 BLEU score improvement with using 1/5 of
the data over using the entire corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We introduce Zipporah, a fast and scalable data cleaning system. We propose a
novel type of bag-of-words translation feature, and train logistic regression
models to classify good data and synthetic noisy data in the proposed feature
space. The trained model is used to score parallel sentences in the data pool
for selection. As shown in experiments, Zipporah selects a high-quality
parallel corpus from a large, mixed quality data pool. In particular, for one
noisy dataset, Zipporah achieves a 2.1 BLEU score improvement with using 1/5 of
the data over using the entire corpus. |
Bojar, Ondřej; Chatterjee, Rajen; Federmann, Christian; Graham, Yvette; Haddow, Barry; Huang, Shujian; Huck, Matthias; Koehn, Philipp; Liu, Qun; Logacheva, Varvara; Monz, Christof; Negri, Matteo; Post, Matt; Rubino, Raphael; Specia, Lucia; Turchi, Marco Findings of the 2017 Conference on Machine Translation (WMT17) Inproceedings Proceedings of the Second Conference on Machine Translation, pp. 169–214, Association for Computational Linguistics, Copenhagen, Denmark, 2017. Links | BibTeX @inproceedings{bojar-EtAl:2017:WMT1,
title = {Findings of the 2017 Conference on Machine Translation (WMT17)},
author = {Ondřej Bojar and Rajen Chatterjee and Christian Federmann and Yvette Graham and Barry Haddow and Shujian Huang and Matthias Huck and Philipp Koehn and Qun Liu and Varvara Logacheva and Christof Monz and Matteo Negri and Matt Post and Raphael Rubino and Lucia Specia and Marco Turchi},
url = {http://www.aclweb.org/anthology/W17-4717},
year = {2017},
date = {2017-09-01},
booktitle = {Proceedings of the Second Conference on Machine Translation},
pages = {169--214},
publisher = {Association for Computational Linguistics},
address = {Copenhagen, Denmark},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Nadejde, Maria; Reddy, Siva; Sennrich, Rico; Dwojak, Tomasz; Junczys-Dowmunt, Marcin; Koehn, Philipp; Birch, Alexandra Predicting Target Language CCG Supertags Improves Neural Machine Translation Inproceedings Proceedings of the Second Conference on Machine Translation, pp. 68–79, Association for Computational Linguistics, Copenhagen, Denmark, 2017. Links | BibTeX @inproceedings{nadejde-EtAl:2017:WMT,
title = {Predicting Target Language CCG Supertags Improves Neural Machine Translation},
author = {Maria Nadejde and Siva Reddy and Rico Sennrich and Tomasz Dwojak and Marcin Junczys-Dowmunt and Philipp Koehn and Alexandra Birch},
url = {http://www.aclweb.org/anthology/W17-4707},
year = {2017},
date = {2017-09-01},
booktitle = {Proceedings of the Second Conference on Machine Translation},
pages = {68--79},
publisher = {Association for Computational Linguistics},
address = {Copenhagen, Denmark},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Ding, Shuoyang; Khayrallah, Huda; Koehn, Philipp; Post, Matt; Kumar, Gaurav; Duh, Kevin The JHU Machine Translation Systems for WMT 2017 Inproceedings Proceedings of the Second Conference on Machine Translation, pp. 276–282, Association for Computational Linguistics, Copenhagen, Denmark, 2017. Links | BibTeX @inproceedings{ding-EtAl:2017:WMT,
title = {The JHU Machine Translation Systems for WMT 2017},
author = {Shuoyang Ding and Huda Khayrallah and Philipp Koehn and Matt Post and Gaurav Kumar and Kevin Duh},
url = {http://www.aclweb.org/anthology/W17-4724},
year = {2017},
date = {2017-09-01},
booktitle = {Proceedings of the Second Conference on Machine Translation},
pages = {276--282},
publisher = {Association for Computational Linguistics},
address = {Copenhagen, Denmark},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Koehn, Philipp; Knowles, Rebecca Six Challenges for Neural Machine Translation Inproceedings Proceedings of the First Workshop on Neural Machine Translation, pp. 28–39, Association for Computational Linguistics, Vancouver, 2017. Abstract | Links | BibTeX @inproceedings{koehn-knowles:2017:NMT,
title = {Six Challenges for Neural Machine Translation},
author = {Philipp Koehn and Rebecca Knowles},
url = {http://www.aclweb.org/anthology/W17-3204},
year = {2017},
date = {2017-08-01},
booktitle = {Proceedings of the First Workshop on Neural Machine Translation},
pages = {28--39},
publisher = {Association for Computational Linguistics},
address = {Vancouver},
abstract = {We explore six challenges for neural machine translation: domain mismatch,
amount of training data, rare words, long sentences, word alignment, and beam
search. We show both deficiencies and improvements over the quality of
phrase-based statistical machine translation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We explore six challenges for neural machine translation: domain mismatch,
amount of training data, rare words, long sentences, word alignment, and beam
search. We show both deficiencies and improvements over the quality of
phrase-based statistical machine translation. |
Renduchintala, Adithya; Koehn, Philipp; Eisner, Jason Knowledge Tracing in Sequential Learning of Inflected Vocabulary Inproceedings Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017), pp. 238–247, Association for Computational Linguistics, Vancouver, Canada, 2017. Abstract | Links | BibTeX @inproceedings{renduchintala-koehn-eisner:2017:CoNLL,
title = {Knowledge Tracing in Sequential Learning of Inflected Vocabulary},
author = {Adithya Renduchintala and Philipp Koehn and Jason Eisner},
url = {http://aclweb.org/anthology/K17-1025},
year = {2017},
date = {2017-08-01},
booktitle = {Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017)},
pages = {238--247},
publisher = {Association for Computational Linguistics},
address = {Vancouver, Canada},
abstract = {We present a feature-rich knowledge tracing method that captures a student's
acquisition and retention of knowledge during a foreign language phrase
learning task. We model the student's behavior as making predictions under a
log-linear model, and adopt a neural gating mechanism to model how the student
updates their log-linear parameters in response to feedback. The gating
mechanism allows the model to learn complex patterns of retention and
acquisition for each feature, while the log-linear parameterization results in
an interpretable knowledge state. We collect human data and evaluate several
versions of the model.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We present a feature-rich knowledge tracing method that captures a student's
acquisition and retention of knowledge during a foreign language phrase
learning task. We model the student's behavior as making predictions under a
log-linear model, and adopt a neural gating mechanism to model how the student
updates their log-linear parameters in response to feedback. The gating
mechanism allows the model to learn complex patterns of retention and
acquisition for each feature, while the log-linear parameterization results in
an interpretable knowledge state. We collect human data and evaluate several
versions of the model. |
Dredze, Mark; Wood-Doughty, Zachary; Quinn, Sandra Crouse; Broniatowski, David A Vaccine opponents' use of Twitter during the 2016 US presidential election: Implications for practice and policy Journal Article Vaccine, 2017. Abstract | BibTeX @article{Dredze:2017fv,
title = {Vaccine opponents' use of Twitter during the 2016 US presidential election: Implications for practice and policy},
author = {Mark Dredze and Zachary Wood-Doughty and Sandra Crouse Quinn and David A Broniatowski},
year = {2017},
date = {2017-07-01},
journal = {Vaccine},
abstract = {The recent inauguration of President Trump carries with it many public health policy implications. During the election, President Trump, like all political candidates, made policy commitments to various interest groups including vaccine skeptics. These groups celebrated the announcement that Robert Kennedy Jr., a noted proponent of a causal link between vaccines and autism, may chair a commission on vaccines. Furthermore, during the GOP primaries, Mr. Trump endorsed messages associated with vaccine refusal on Twitter, and met with prominent vaccine refusal advocates including Andrew Wakefield, who published the retracted and discredited 1998 Lancet article claiming to link autism to MMR vaccination. In this paper, we show that the new administration has mobilized vaccine refusal advocates, potentially enabling them to influence the national agenda in a manner that could lead to changes in existing vaccination policy.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The recent inauguration of President Trump carries with it many public health policy implications. During the election, President Trump, like all political candidates, made policy commitments to various interest groups including vaccine skeptics. These groups celebrated the announcement that Robert Kennedy Jr., a noted proponent of a causal link between vaccines and autism, may chair a commission on vaccines. Furthermore, during the GOP primaries, Mr. Trump endorsed messages associated with vaccine refusal on Twitter, and met with prominent vaccine refusal advocates including Andrew Wakefield, who published the retracted and discredited 1998 Lancet article claiming to link autism to MMR vaccination. In this paper, we show that the new administration has mobilized vaccine refusal advocates, potentially enabling them to influence the national agenda in a manner that could lead to changes in existing vaccination policy. |