「数据挖掘」Apriori算法实现

只是应付作业的小程序,用python简单实现了逻辑。
题目是《Mining of Massive Datasets》的6.2.6(a)
apriori

from math import sqrt
import itertools

def constructor_filter(dataset, prevlist, step):
	tmpc = {}
	for basket in dataset:
		if len(basket) < step:
			continue
		bklist = sorted(list(basket))
		for item in itertools.combinations(bklist, step):
			flag = True
			for pair in itertools.combinations(item, step-1):
				if pair not in prevlist:
					flag = False
					break
			if flag:
				tmpc[item] = tmpc.get(item, 0) + 1
	return [k for k, item in tmpc.iteritems() if item >= 5]

def makedata():
	dataset = [set() for i in range(101)]
	for i in range(1, 101):
		for j in range(1, int(sqrt(i))+1):
			if i%j == 0:
				dataset[i].add(j)
				dataset[i].add(i/j)
	return dataset

def main():
	dataset = makedata()
	# step 1
	step = 1
	c = {}
	for basket in dataset:
		for item in basket:
			c[item] = c.get(item, 0) + 1
	l = [(k,) for k, item in c.iteritems() if item >= 5]
	print 'l%d: ' % step, l
	print 'length of l%d: %d' % (step, len(l))
	#other steps
	while(True):
		step += 1
		l = constructor_filter(dataset, l, step)
		if not l:
			break
		print 'l%d: ' % step, l
		print 'length of l%d: %d' % (step, len(l))

if __name__ == '__main__':
	main()
Tags : ,

0 thoughts on “「数据挖掘」Apriori算法实现”

发表评论

电子邮件地址不会被公开。 必填项已用*标注

Click the right image To submit your comment: