Publications | Lu Lin

Full and up-to-date list is on Google Scholar.

Preprints

2024

arXiv

AdvI2I: Adversarial Image Attack on Image-to-Image Diffusion models

Yaopei Zeng, Yuanpu Cao, Bochuan Cao, Yurui Chang, Jinghui Chen, and Lu Lin

arXiv preprint arXiv:2410.21471, 2024

@article{zeng2024advi2i,
  title = {AdvI2I: Adversarial Image Attack on Image-to-Image Diffusion models},
  author = {Zeng, Yaopei and Cao, Yuanpu and Cao, Bochuan and Chang, Yurui and Chen, Jinghui and Lin, Lu},
  journal = {arXiv preprint arXiv:2410.21471},
  year = {2024},
  category = {preprint},
}

arXiv

Mitigating Graph Covariate Shift via Score-based Out-of-distribution Augmentation

Bohan Wang, Yurui Chang, and Lu Lin

arXiv preprint arXiv:2410.17506, 2024

Bib HTML

@article{wang2024mitigating,
  title = {Mitigating Graph Covariate Shift via Score-based Out-of-distribution Augmentation},
  author = {Wang, Bohan and Chang, Yurui and Lin, Lu},
  journal = {arXiv preprint arXiv:2410.17506},
  year = {2024},
  category = {preprint},
}

arXiv

XPrompt: Explaining Large Language Model’s Generation via Joint Prompt Attribution

Yurui Chang, Bochuan Cao , Yujia Wang, Jinghui Chen, and Lu Lin

arXiv preprint arXiv:2405.20404, 2024

Bib HTML

@article{chang2024xprompt,
  title = {XPrompt: Explaining Large Language Model's Generation via Joint Prompt Attribution},
  author = {Chang, Yurui and Cao, Bochuan and Wang, Yujia and Chen, Jinghui and Lin, Lu},
  journal = {arXiv preprint arXiv:2405.20404},
  year = {2024},
  category = {preprint},
}

arXiv

WordGame: Efficient & Effective LLM Jailbreak via Simultaneous Obfuscation in Query and Response

Tianrong Zhang, Bochuan Cao, Yuanpu Cao, Lu Lin, Prasenjit Mitra, and Jinghui Chen

arXiv preprint arXiv:2405.14023, 2024

Bib HTML

@article{zhang2024wordgame,
  title = {WordGame: Efficient \& Effective LLM Jailbreak via Simultaneous Obfuscation in Query and Response},
  author = {Zhang, Tianrong and Cao, Bochuan and Cao, Yuanpu and Lin, Lu and Mitra, Prasenjit and Chen, Jinghui},
  journal = {arXiv preprint arXiv:2405.14023},
  year = {2024},
  category = {preprint},
}

arXiv

Adversarially robust industrial anomaly detection through diffusion model

Yuanpu Cao, Lu Lin, and Jinghui Chen

arXiv preprint arXiv:2408.04839, 2024

Bib HTML

@article{cao2024adversarially,
  title = {Adversarially robust industrial anomaly detection through diffusion model},
  author = {Cao, Yuanpu and Lin, Lu and Chen, Jinghui},
  journal = {arXiv preprint arXiv:2408.04839},
  year = {2024},
  category = {preprint},
}

Conference & Journal Papers

2024

NeurIPS

Personalized Steering of Large Language Models: Versatile Steering Vectors Through Bi-directional Preference Optimization

Yuanpu Cao, Tianrong Zhang, Bochuan Cao, Ziyi Yin, Lu Lin, Fenglong Ma, and Jinghui Chen

In Proceedings of the 38th Annual Conference on Neural Information Processing Systems , 2024

Bib HTML

@inproceedings{cao2024personalized,
  title = {Personalized Steering of Large Language Models: Versatile Steering Vectors Through Bi-directional Preference Optimization},
  author = {Cao, Yuanpu and Zhang, Tianrong and Cao, Bochuan and Yin, Ziyi and Lin, Lu and Ma, Fenglong and Chen, Jinghui},
  booktitle = {Proceedings of the 38th Annual Conference on Neural Information Processing Systems},
  year = {2024},
  category = {conference},
}

ACL

Defending Against Alignment-Breaking Attacks via Robustly Aligned LLM

Bochuan Cao, Yuanpu Cao, Lu Lin, and Jinghui Chen

In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) , 2024

Bib HTML

@inproceedings{cao2023defending,
  title = {Defending Against Alignment-Breaking Attacks via Robustly Aligned LLM},
  author = {Cao, Bochuan and Cao, Yuanpu and Lin, Lu and Chen, Jinghui},
  booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  year = {2024},
  category = {conference},
}

ACL

Jailbreak Open-Sourced Large Language Models via Enforced Decoding

Hangfan Zhang, Zhimeng Guo, Huaisheng Zhu, Bochuan Cao, Lu Lin, Jinyuan Jia, Jinghui Chen, and Dinghao Wu

In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) , 2024

Bib HTML

@inproceedings{zhang2024jailbreak,
  title = {Jailbreak Open-Sourced Large Language Models via Enforced Decoding},
  author = {Zhang, Hangfan and Guo, Zhimeng and Zhu, Huaisheng and Cao, Bochuan and Lin, Lu and Jia, Jinyuan and Chen, Jinghui and Wu, Dinghao},
  booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  year = {2024},
  category = {conference},
}

ICML

Graph Adversarial Diffusion Convolution

Songtao Liu, Jinghui Chen, Tianfan Fu, Lu Lin, Marinka Zitnik, and Dinghao Wu

In Proceedings of the 41st International Conference on Machine Learning , 2024

Bib HTML

@inproceedings{liu2024graph,
  title = {Graph Adversarial Diffusion Convolution},
  author = {Liu, Songtao and Chen, Jinghui and Fu, Tianfan and Lin, Lu and Zitnik, Marinka and Wu, Dinghao},
  booktitle = {Proceedings of the 41st International Conference on Machine Learning},
  year = {2024},
  category = {conference},
}

ICLR

Backdoor Contrastive Learning via Bi-level Trigger Optimization

Weiyu Sun, Xinyu Zhang, Hao Lu, Ying-Cong Chen , Ting Wang, Jinghui Chen, and Lu Lin

In Proceedings of of the 12th International Conference on Learning Representations , 2024

Bib HTML

@inproceedings{sun2024backdoor,
  title = {Backdoor Contrastive Learning via Bi-level Trigger Optimization},
  author = {Sun, Weiyu and Zhang, Xinyu and Lu, Hao and Chen, Ying-Cong and Wang, Ting and Chen, Jinghui and Lin, Lu},
  booktitle = {Proceedings of of the 12th International Conference on Learning Representations},
  year = {2024},
  category = {conference},
}

WWW

Globally Interpretable Graph Learning via Distribution Matching

Yi Nian*, Yurui Chang*, Wei Jin, and Lu Lin

In Proceedings of the Web Conference , 2024

Bib HTML

@inproceedings{nian2024globally,
  title = {Globally Interpretable Graph Learning via Distribution Matching},
  author = {Nian*, Yi and Chang*, Yurui and Jin, Wei and Lin, Lu},
  booktitle = {Proceedings of the Web Conference},
  year = {2024},
  category = {conference},
}

WWW

Graph Contrastive Learning via Interventional View Generation

Zengyi Wo, Minglai Shao , Wenjun Wang, Xuan Guo, and Lu Lin

In Proceedings of the Web Conference , 2024

Bib HTML

@inproceedings{wo2024graph,
  title = {Graph Contrastive Learning via Interventional View Generation},
  author = {Wo, Zengyi and Shao, Minglai and Wang, Wenjun and Guo, Xuan and Lin, Lu},
  booktitle = {Proceedings of the Web Conference},
  year = {2024},
  category = {conference},
}

2023

NeurIPS

A3FL: Adversarially Adaptive Backdoor Attacks to Federated Learning

Hangfan Zhang, Jinyuan Jia, Jinghui Chen, Lu Lin, and Dinghao Wu

In Proceedings of the 37th Conference on Neural Information Processing Systems , 2023

Bib HTML Code

@inproceedings{zhang2023a3fl,
  title = {A3FL: Adversarially Adaptive Backdoor Attacks to Federated Learning},
  author = {Zhang, Hangfan and Jia, Jinyuan and Chen, Jinghui and Lin, Lu and Wu, Dinghao},
  booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems},
  year = {2023},
  category = {conference},
}

ICML
FusionRetro: Molecule Representation Fusion via In-Context Learning for Retrosynthetic Planning

Songtao Liu, Zhengkai Tu, Minkai Xu, Zuobai Zhang, Lu Lin, Rex Ying, Jian Tang, Peilin Zhao, and Dinghao Wu

In Proceedings of the 40th International Conference on Machine Learning , 2023

Abs Bib HTML Code

Retrosynthetic planning aims to devise a complete multi-step synthetic route from starting materials to a target molecule. Current strategies use a decoupled approach of single-step retrosynthesis models and search algorithms, taking only the product as the input to predict the reactants for each planning step and ignoring valuable context information along the synthetic route. In this work, we propose a novel framework that utilizes context information for improved retrosynthetic planning. We view synthetic routes as reaction graphs and propose to incorporate context through three principled steps: encode molecules into embeddings, aggregate information over routes, and readout to predict reactants. Our approach is the first attempt to utilize in-context learning for retrosynthesis prediction in retrosynthetic planning. The entire framework can be efficiently optimized in an end-to-end fashion and produce more practical and accurate predictions. Comprehensive experiments demonstrate that by fusing in the context information over routes, our model significantly improves the performance of retrosynthetic planning over baselines that are not context-aware, especially for long synthetic routes. Code is available at Github.
@inproceedings{liu2023fusionretro, title = {FusionRetro: Molecule Representation Fusion via In-Context Learning for Retrosynthetic Planning}, author = {Liu, Songtao and Tu, Zhengkai and Xu, Minkai and Zhang, Zuobai and Lin, Lu and Ying, Rex and Tang, Jian and Zhao, Peilin and Wu, Dinghao}, booktitle = {Proceedings of the 40th International Conference on Machine Learning}, year = {2023}, category = {conference}, }
ICML
Graph Contrastive Backdoor Attacks

Hangfan Zhang, Jinghui Chen, Lu Lin, Jinyuan Jia, and Dinghao Wu

In Proceedings of the 40th International Conference on Machine Learning , 2023

Abs Bib HTML

Graph Contrastive Learning (GCL) has attracted considerable interest due to its impressive node representation learning capability. Despite the wide application of GCL techniques, little attention has been paid to the security of GCL. In this paper, we systematically study the vulnerability of GCL in the presence of malicious backdoor adversaries. In particular, we propose *GCBA*, the first backdoor attack for graph contrastive learning. GCBA incorporates three attacks: poisoning, crafting, and natural backdoor, each targeting one stage of the GCL pipeline. We formulate our attacks as optimization problems and solve them with a novel discrete optimization technique to overcome the discrete nature of graph-structured data. By extensively evaluating GCBA on multiple datasets and GCL methods, we show that our attack can achieve high attack success rates while preserving stealthiness. We further consider potential countermeasures to our attack and conclude that existing defenses are insufficient to mitigate GCBA. We show that as a complex paradigm involving data and model republishing, GCL is vulnerable to backdoor attacks, and specifically designed defenses are needed to mitigate the backdoor attacks on GCL.
@inproceedings{zhang2023graph, title = {Graph Contrastive Backdoor Attacks}, author = {Zhang, Hangfan and Chen, Jinghui and Lin, Lu and Jia, Jinyuan and Wu, Dinghao}, booktitle = {Proceedings of the 40th International Conference on Machine Learning}, year = {2023}, category = {conference}, }
ICLR
Spectral augmentation for self-supervised learning on graphs

Lu Lin, Jinghui Chen , and Hongning Wang

In Proceedings of the 11th International Conference on Learning Representations , 2023

Abs Bib HTML Code

Graph contrastive learning (GCL), as an emerging self-supervised learning technique on graphs, aims to learn representations via instance discrimination. Its performance heavily relies on graph augmentation to reflect invariant patterns that are robust to small perturbations; yet it still remains unclear about what graph invariance GCL should capture. Recent studies mainly perform topology augmentations in a uniformly random manner in the spatial domain, ignoring its influence on the intrinsic structural properties embedded in the spectral domain. In this work, we aim to find a principled way for topology augmentations by exploring the invariance of graphs from the spectral perspective. We develop spectral augmentation which guides topology augmentations by maximizing the spectral change. Extensive experiments on both graph and node classification tasks demonstrate the effectiveness of our method in self-supervised representation learning. The proposed method also brings promising generalization capability in transfer learning, and is equipped with intriguing robustness property under adversarial attacks. Our study sheds light on a general principle for graph topology augmentation.
@inproceedings{lin2023spectral, title = {Spectral augmentation for self-supervised learning on graphs}, author = {Lin, Lu and Chen, Jinghui and Wang, Hongning}, booktitle = {Proceedings of the 11th International Conference on Learning Representations}, year = {2023}, category = {conference}, }

2022

KDD
Graph Structural Attack by Perturbing Spectral Distance

Lu Lin, Ethan Blaser , and Hongning Wang

In Proceedings of the 28th ACM SIGKDD international conference on knowledge discovery & data mining , 2022

Abs Bib HTML Code

Graph Convolutional Networks (GCNs) have fueled a surge of interest due to their superior performance on graph learning tasks, but are also shown vulnerability to adversarial attacks. In this paper, an effective graph structural attack is investigated to disrupt graph spectral filters in the Fourier domain. We define the spectral distance based on the eigenvalues of graph Laplacian to measure the disruption of spectral filters. We then generate edge perturbations by simultaneously maximizing a task-specific attack objective and the proposed spectral distance. The experiments demonstrate remarkable effectiveness of the proposed attack in the white-box setting at both training and test time. Our qualitative analysis shows the connection between the attack behavior and the imposed changes on the spectral distribution, which provides empirical evidence that maximizing spectral distance is an effective manner to change the structural property of graphs in the spatial domain and perturb the frequency components in the Fourier domain.
@inproceedings{lin2021graph, title = {Graph Structural Attack by Perturbing Spectral Distance}, author = {Lin, Lu and Blaser, Ethan and Wang, Hongning}, booktitle = {Proceedings of the 28th ACM SIGKDD international conference on knowledge discovery \& data mining}, year = {2022}, category = {conference}, }
ICML
Communication-Efficient Adaptive Federated Learning

Yujia Wang, Lu Lin, and Jinghui Chen

In Proceedings of the 39th International Conference on Machine Learning , 2022

Abs Bib HTML

Due to the explosion in the size of the training datasets, distributed learning has received growing interest in recent years. One of the major bottlenecks is the large communication cost between the central server and the local workers. While error feedback compression has been proven to be successful in reducing communication costs with stochastic gradient descent (SGD), there are much fewer attempts in building communication-efficient adaptive gradient methods with provable guarantees, which are widely used in training large-scale machine learning models. In this paper, we propose a new communication-compressed AMSGrad for distributed nonconvex optimization problem, which is provably efficient. Our proposed distributed learning framework features an effective gradient compression strategy and a worker-side model update design. We prove that the proposed communication-efficient distributed adaptive gradient method converges to the first-order stationary point with the same iteration complexity as uncompressed vanilla AMSGrad in the stochastic nonconvex optimization setting. Experiments on various benchmarks back up our theory.
@inproceedings{wang2022communication, title = {Communication-Efficient Adaptive Federated Learning}, author = {Wang, Yujia and Lin, Lu and Chen, Jinghui}, booktitle = {Proceedings of the 39th International Conference on Machine Learning}, year = {2022}, category = {conference}, }
AISTATS
Communication-Compressed Adaptive Gradient Method for Distributed Nonconvex Optimization

Yujia Wang, Lu Lin, and Jinghui Chen

In Proceedings of the 25th International Conference on Artificial Intelligence and Statistics , 2022

Abs Bib HTML

Due to the explosion in the size of the training datasets, distributed learning has received growing interest in recent years. One of the major bottlenecks is the large communication cost between the central server and the local workers. While error feedback compression has been proven to be successful in reducing communication costs with stochastic gradient descent (SGD), there are much fewer attempts in building communication-efficient adaptive gradient methods with provable guarantees, which are widely used in training large-scale machine learning models. In this paper, we propose a new communication-compressed AMSGrad for distributed nonconvex optimization problem, which is provably efficient. Our proposed distributed learning framework features an effective gradient compression strategy and a worker-side model update design. We prove that the proposed communication-efficient distributed adaptive gradient method converges to the first-order stationary point with the same iteration complexity as uncompressed vanilla AMSGrad in the stochastic nonconvex optimization setting. Experiments on various benchmarks back up our theory.
@inproceedings{wang2021communication, title = {Communication-Compressed Adaptive Gradient Method for Distributed Nonconvex Optimization}, author = {Wang, Yujia and Lin, Lu and Chen, Jinghui}, booktitle = {Proceedings of the 25th International Conference on Artificial Intelligence and Statistics}, year = {2022}, category = {conference}, }
AAAI Workshop
Communication-Compressed Adaptive Gradient Method for Distributed Nonconvex Optimization

Yujia Wang, Lu Lin, and Jinghui Chen

In International Workshop on Trustable, Verifiable and Auditable Federated Learning in Conjunction with AAAI , 2022

Abs Bib HTML

Due to the explosion in the size of the training datasets, distributed learning has received growing interest in recent years. One of the major bottlenecks is the large communication cost between the central server and the local workers. While error feedback compression has been proven to be successful in reducing communication costs with stochastic gradient descent (SGD), there are much fewer attempts in building communication-efficient adaptive gradient methods with provable guarantees, which are widely used in training large-scale machine learning models. In this paper, we propose a new communication-compressed AMSGrad for distributed nonconvex optimization problem, which is provably efficient. Our proposed distributed learning framework features an effective gradient compression strategy and a worker-side model update design. We prove that the proposed communication-efficient distributed adaptive gradient method converges to the first-order stationary point with the same iteration complexity as uncompressed vanilla AMSGrad in the stochastic nonconvex optimization setting. Experiments on various benchmarks back up our theory.
@inproceedings{wang2021communication2, title = {Communication-Compressed Adaptive Gradient Method for Distributed Nonconvex Optimization}, author = {Wang, Yujia and Lin, Lu and Chen, Jinghui}, booktitle = {International Workshop on Trustable, Verifiable and Auditable Federated Learning in Conjunction with AAAI}, year = {2022}, category = {conference}, }
WWW
Unbiased Graph Embedding with Biased Graph Observations

Nan Wang*, Lu Lin*, Jundong Li , and Hongning Wang

In Proceedings of the Web Conference , 2022

Abs Bib HTML Code

Graph embedding techniques are pivotal in real-world machine learning tasks that operate on graph-structured data, such as social recommendation and protein structure modeling. Embeddings are mostly performed on the node level for learning representations of each node. Since the formation of a graph is inevitably affected by certain sensitive node attributes, the node embeddings can inherit such sensitive information and introduce undesirable biases in downstream tasks. Most existing works impose ad-hoc constraints on the node embeddings to restrict their distributions for unbiasedness/fairness, which however compromise the utility of the resulting embeddings. In this paper, we propose a principled new way for unbiased graph embedding by learning node embeddings from an underlying bias-free graph, which is not influenced by sensitive node attributes. Motivated by this new perspective, we propose two complementary methods for uncovering such an underlying graph, with the goal of introducing minimum impact on the utility of the embeddings. Both our theoretical justification and extensive experimental comparisons against state-of-the-art solutions demonstrate the effectiveness of our proposed methods.
@inproceedings{wang2021unbiased, title = {Unbiased Graph Embedding with Biased Graph Observations}, author = {Wang*, Nan and Lin*, Lu and Li, Jundong and Wang, Hongning}, booktitle = {Proceedings of the Web Conference}, year = {2022}, category = {conference}, }
WSDM
Graph Embedding with Hierarchical Attentive Membership

Lu Lin, Ethan Blaser , and Hongning Wang

In Proceedings of the 15th International Conference on Web Search and Data Mining , 2022

Abs Bib HTML

This paper studies a remarkable property of graphs which is the latent hierarchical grouping of nodes, where each node manifests its membership to a specific group based on the context composed by its neighboring nodes. When modeling the neighborhood structure for graph representation learning, most prior works ignore such latent groups and nodes’ membership to different groups, not to mention the hierarchy. Thus, they fall short of delivering a comprehensive understanding of the nodes under different contexts in a graph. In this paper, we propose a novel hierarchical attentive membership model for graph embedding, where the latent memberships for each node are dynamically discovered based on its neighboring context. Both group-level and individual-level attentions are performed when aggregating neighboring states to generate node embeddings. We introduce structural constraints to explicitly regularize the inferred memberships of each node, such that a well-defined hierarchical grouping structure is captured. The proposed model outperformed a set of state-of-the-art graph embedding solutions on node classification and link prediction tasks in a variety of graphs including citation networks and social networks. Qualitative evaluations visualize the learned node embeddings along with the inferred memberships, which proved the concept of membership hierarchy and enables explainable embedding learning in graphs.
@inproceedings{lin2021grapi, title = {Graph Embedding with Hierarchical Attentive Membership}, author = {Lin, Lu and Blaser, Ethan and Wang, Hongning}, booktitle = {Proceedings of the 15th International Conference on Web Search and Data Mining}, year = {2022}, category = {conference}, }

2020

KDD
Graph Attention Networks over Edge Content-based Channels

Lu Lin , and Hongning Wang

In Proceedings of the 26th ACM SIGKDD international conference on knowledge discovery & data mining , 2020

Abs Bib HTML Code

Edges play a crucial role in passing information on a graph, especially when they carry textual content reflecting semantics behind how nodes are linked and interacting with each other. In this paper, we propose a channel-aware attention mechanism enabled by edge text content when aggregating information from neighboring nodes; and we realize this mechanism in a graph autoencoder framework. Edge text content is encoded as low-dimensional mixtures of latent topics, which serve as semantic channels for topic-level information passing on edges. We embed nodes and topics in the same latent space to capture their mutual dependency when decoding the structural and textual information on graph. We evaluated the proposed model on Yelp user-item bipartite graph and StackOverflow user-user interaction graph. The proposed model outperformed a set of baselines on link prediction and content prediction tasks. Qualitative evaluations also demonstrated the descriptive power of the learnt node embeddings, showing its potential as an interpretable representation of graphs.
@inproceedings{lin2020graph, title = {Graph Attention Networks over Edge Content-based Channels}, author = {Lin, Lu and Wang, Hongning}, booktitle = {Proceedings of the 26th ACM SIGKDD international conference on knowledge discovery \& data mining}, year = {2020}, category = {conference}, }
WSDM
JNET: Learning User Representations via Joint Network Embedding and Topic Embedding

Lin Gong, Lu Lin, Weihao Song , and Hongning Wang

In Proceedings of the 13th International Conference on Web Search and Data Mining , 2020

Abs Bib HTML Code

User representation learning is vital to capture diverse user preferences, while it is also challenging as user intents are latent and scattered among complex and different modalities of user-generated data, thus, not directly measurable. Inspired by the concept of user schema in social psychology, we take a new perspective to perform user representation learning by constructing a shared latent space to capture the dependency among different modalities of user-generated data. Both users and topics are embedded to the same space to encode users’ social connections and text content, to facilitate joint modeling of different modalities, via a probabilistic generative framework. We evaluated the proposed solution on large collections of Yelp reviews and StackOverflow discussion posts, with their associated network structures. The proposed model outperformed several state-of-the-art topic modeling based user models with better predictive power in unseen documents, and state-of-the-art network embedding based user models with improved link prediction quality in unseen nodes. The learnt user representations are also proved to be useful in content recommendation, e.g., expert finding in StackOverflow.
@inproceedings{gong2020jnet, title = {JNET: Learning User Representations via Joint Network Embedding and Topic Embedding}, author = {Gong, Lin and Lin, Lu and Song, Weihao and Wang, Hongning}, booktitle = {Proceedings of the 13th International Conference on Web Search and Data Mining}, year = {2020}, category = {conference}, }

2019

BuildSys
Sequential Learning with Active Partial Labeling for Building Metadata

Lu Lin, Zheng Luo, Dezhi Hong , and Hongning Wang

In Proceedings of the 6th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation , 2019

Abs Bib HTML Code

Modern buildings are instrumented with thousands of sensing and control points. The ability to automatically extract the physical context of each point, e.g., the type, location, and relationship with other points, is the key to enabling building analytics at scale. However, this process is costly as it usually requires domain expertise with a deep understanding of the building system and its point naming scheme. In this study, we aim to reduce the human effort required for mapping sensors to their context, i.e., metadata mapping. We formulate the problem as a sequential labeling process and use the conditional random field to exploit the regular and dependent structures observed in the metadata. We develop a suite of active learning strategies to adaptively select the most informative subsequences in point names for human labeling, which significantly reduces the inputs from domain experts. We evaluated our approach on three different buildings and observed encouraging performance in metadata mapping from the proposed solution.
@inproceedings{lin2019sequential, title = {Sequential Learning with Active Partial Labeling for Building Metadata}, author = {Lin, Lu and Luo, Zheng and Hong, Dezhi and Wang, Hongning}, booktitle = {Proceedings of the 6th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation}, year = {2019}, category = {conference}, }
WSDM
Learning Personalized Topical Compositions with Item Response Theory

Lu Lin, Lin Gong , and Hongning Wang

In Proceedings of the 12th ACM International Conference on Web Search and Data Mining , 2019

Abs Bib HTML

A user-generated review document is a product between the item’s intrinsic properties and the user’s perceived composition of those properties. Without properly modeling and decoupling these two factors, one can hardly obtain any accurate user understanding nor item profiling from such user-generated data. In this paper, we study a new text mining problem that aims at differentiating a user’s subjective composition of topical content in his/her review document from the entity’s intrinsic properties. Motivated by the Item Response Theory (IRT), we model each review document as a user’s detailed response to an item, and assume the response is jointly determined by the individuality of the user and the property of the item. We model the text-based response with a generative topic model, in which we characterize the items’ properties and users’ manifestations of them in a low-dimensional topic space. Via posterior inference, we separate and study these two components over a collection of review documents. Extensive experiments on two large collections of Amazon and Yelp review data verified the effectiveness of the proposed solution: it outperforms the state-of-art topic models with better predictive power in unseen documents, which is directly translated into improved performance in item recommendation and item summarization tasks.
@inproceedings{lin2019learning, title = {Learning Personalized Topical Compositions with Item Response Theory}, author = {Lin, Lu and Gong, Lin and Wang, Hongning}, booktitle = {Proceedings of the 12th ACM International Conference on Web Search and Data Mining}, year = {2019}, category = {conference}, }

2017

TKDE
Road Traffic Speed Prediction: A Probabilistic Model Fusing Multi-source Data

Lu Lin, Jianxin Li, Feng Chen, Jieping Ye, and Jinpeng Huai

IEEE Transactions on Knowledge and Data Engineering, 2017

Abs Bib HTML

Road traffic speed prediction is a challenging problem in intelligent transportation system (ITS) and has gained increasing attentions. Existing works are mainly based on raw speed sensing data obtained from infrastructure sensors or probe vehicles, which, however, are limited by expensive cost of sensor deployment and maintenance. With sparse speed observations, traditional methods based only on speed sensing data are insufficient, especially when emergencies like traffic accidents occur. To address the issue, this paper aims to improve the road traffic speed prediction by fusing traditional speed sensing data with new-type “sensing” data from cross domain sources, such as tweet sensors from social media and trajectory sensors from map and traffic service platforms. Jointly modeling information from different datasets brings many challenges, including location uncertainty of low-resolution data, language ambiguity of traffic description in texts, and heterogeneity of cross-domain data. In response to these challenges, we present a unified probabilistic framework, called Topic-Enhanced Gaussian Process Aggregation Model (TEGPAM), consisting of three components, i.e., location disaggregation model, traffic topic model, and traffic speed Gaussian Process model, which integrate new-type data with traditional data. Experiments on real world data from two large cities validate the effectiveness and efficiency of our model.
@article{lin2017road, title = {Road Traffic Speed Prediction: A Probabilistic Model Fusing Multi-source Data}, author = {Lin, Lu and Li, Jianxin and Chen, Feng and Ye, Jieping and Huai, Jinpeng}, journal = {IEEE Transactions on Knowledge and Data Engineering}, year = {2017}, category = {conference}, }

2014

UCC
Opinion Mining and Sentiment Analysis in Social Networks: A Retweeting Structure-aware Approach

Lu Lin, Jianxin Li, Richong Zhang, Weiren Yu , and Chenggen Sun

In Proceedings of 7th IEEE/ACM International Conference on Utility and Cloud Computing , 2014

Abs Bib HTML

Microblogs have become quick and easy online information sharing platforms with the explosive growth of online social media. Weibo, a Twitter-like microblog service in China, is characterized by timeliness and interactivity. A Weibo message carries the user’s views and sentiments, particularly forms a fission-like spreading structure while being retweeted. Such structure accelerates information diffusion, and reflects different topics and opinions as well. However, current researches mainly focus on sentiment classification, which neither efficiently combine tree-like retweeting structure nor analyze opinion evolutions with a holistic view. In light of this, we build an opinion descriptive model, and propose an opinion mining method based on this model. With a microblog-oriented sentiment lexicon being constructed, a lexicon-based sentiment orientation analysis algorithm is designed to classify sentiments. Finally, we design and implement a prototype which can mine opinions with respect to retweeting tree structures and retweeting comments.
@inproceedings{lin2014opinion, title = {Opinion Mining and Sentiment Analysis in Social Networks: A Retweeting Structure-aware Approach}, author = {Lin, Lu and Li, Jianxin and Zhang, Richong and Yu, Weiren and Sun, Chenggen}, booktitle = {Proceedings of 7th IEEE/ACM International Conference on Utility and Cloud Computing}, year = {2014}, category = {conference}, }