@article{Ghaffar_Shahbaz_Mahmood_2013, title={OPTIMAL SAMPLING STRATEGY FOR DATA MINING}, volume={50}, url={http://www.thenucleuspak.org.pk/index.php/Nucleus/article/view/749}, abstractNote={Latest technology like Internet, corporate intranets, data warehouses, ERPâ€˜s, satellites, digital sensors, embedded systems, mobiles networks all are generating such a massive amount of data that it is getting very difficult to analyze and understand all these data, even using data mining tools. Huge datasets are becoming a difficult challenge for classification algorithms. With increasing amounts of data, data mining algorithms are getting slower and analysis is getting less interactive. Sampling can be a solution. Using a fraction of computing resources, Sampling can often provide same level of accuracy. The process of sampling requires much care because there are many factors involved in the determination of correct sample size. The approach proposed in this paper tries to find a solution to this problem. Based on a statistical formula, after setting some parameters, it returns a sample size called â€œsufficient sample sizeâ€, which is then selected through probability sampling. Results indicate the usefulness of this technique in coping with the problem of huge datasets.}, number={3}, journal={The Nucleus}, author={Ghaffar, A. and Shahbaz, M. and Mahmood, W.}, year={2013}, month={Aug.}, pages={219–228} }