代码语言
.
CSharp
.
JS
Java
Asp.Net
C
MSSQL
PHP
Css
PLSQL
Python
Shell
EBS
ASP
Perl
ObjC
VB.Net
VBS
MYSQL
GO
Delphi
AS
DB2
Domino
Rails
ActionScript
Scala
代码分类
文件
系统
字符串
数据库
网络相关
图形/GUI
多媒体
算法
游戏
Jquery
Extjs
Android
HTML5
菜单
网页交互
WinForm
控件
企业应用
安全与加密
脚本/批处理
开放平台
其它
【
Java
】
基于LRU算法的URL过滤器
作者:
lugechao
/ 发布于
2014/1/23
/
344
package Processer; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.Statement; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.Random; import java.util.Map.Entry; import database.DatabaseCon; public class LRUFilter { private int CurrentCacheSize; private int MAX; private int MAXIP; private HashMap<Integer,HashMap> rootMap; private HashMap<Integer,Int> DBMap; private Connection con; private LinkedList<Element> lQueue; private HashMap<Integer,Element> slQueue; public LRUFilter(Connection c){ this(20000,10000,c); } public LRUFilter(int capable,int oneip,Connection c){ MAX=capable; CurrentCacheSize=0; MAXIP=oneip; rootMap=new HashMap<Integer,HashMap>(); DBMap=new HashMap<Integer,Int>(); lQueue=new LinkedList<Element>(); slQueue=new HashMap<Integer,Element>(); con=c; initialCache(); } private boolean hit(HashMap<Integer,DBElement> urlMap,int ip,int url){ boolean contain=false; slQueue.get(ip).hits++; DBElement dbe=null; if((dbe=urlMap.get(url))!=null){ contain=true; if(dbe.flag==0){ dbe.hits++; dbe.flag=1; }else dbe.hits++; }else{ if(DBMap.get(ip).value<MAXIP){ contain=false; urlMap.put(url, new DBElement(1,ip,2)); CurrentCacheSize++; while(CurrentCacheSize>MAX) toDB(0); if(urlMap.size()>MAXIP){ toDB(ip); fromDB(ip); } }else{ contain=toDBDirect(url,new DBElement(1,ip,0)); Int i=null; if(!contain&&(i=DBMap.get(ip))!=null) i.value+=1; } } return contain; } private boolean notHit(int ip,int url){ return hit(fromDB(ip),ip,url); } /* * get the keyip and its count into the DBMap which are the count cache of the ip */ private void initialCache(){ try{ String sql="use crawler;select keyip,count(keyip) from visited group by keyip"; Statement stm=con.createStatement(); ResultSet rs=stm.executeQuery(sql); while(rs.next()) DBMap.put(rs.getInt(1), new Int(rs.getInt(2))); rs.close(); stm.close(); }catch(Exception e){ e.printStackTrace(); } } /* * filter the url, ip is the url's ip. */ public boolean contain(String ip,String url){ HashMap<Integer,DBElement> urlMap=null; int keyip=ip.hashCode(); int keyurl=url.hashCode(); if((urlMap=rootMap.get(keyip))!=null) return hit(urlMap,keyip,keyurl); else return notHit(keyip,keyurl); } /* * If ip is equal to 0,get the least recently use ip from the lQueue,writ all * the url belong the ip to the database, change the number of url belong this ip in DBMap * if ip is not equal to 0, write the url belong this ip to the database.renew the lQueue and slQueue and DBMap */ private boolean toDB(int ip){ HashMap<Integer,DBElement> urlMap=null; Int count=null; int num; if(ip==0){ Collections.sort(lQueue,new MyComparator()); Element e=null; if((e=lQueue.poll())!=null){ ip=e.ip; slQueue.remove(ip); if((urlMap=rootMap.remove(ip))!=null){ num=writeToDB(urlMap); if((count=DBMap.get(ip))!=null) count.value+=num; CurrentCacheSize-=urlMap.size(); } }else return false;//empty }else{ if((urlMap=rootMap.remove(ip))!=null){ num=writeToDB(urlMap); if((count=DBMap.get(ip))!=null) count.value+=num; CurrentCacheSize-=urlMap.size(); } lQueue.remove(slQueue.remove(ip)); } return true; } private HashMap<Integer,DBElement> fromDB(int ip){ Int i=null; HashMap<Integer,DBElement> urlMap=null; if((i=DBMap.get(ip))!=null){ if(i.value>MAXIP) urlMap=readFromDB(ip,true); else urlMap=readFromDB(ip,false); }else{ urlMap=new HashMap<Integer,DBElement>(); DBMap.put(ip, new Int(0)); } while(urlMap.size()+CurrentCacheSize>MAX) toDB(0); for(int j=0;j<lQueue.size();j++) lQueue.get(j).hits=0; Element e=new Element(ip,0); lQueue.add(e); slQueue.put(ip, e); rootMap.put(ip, urlMap); CurrentCacheSize+=urlMap.size(); while(CurrentCacheSize>MAX) toDB(0); return urlMap; } /* * write to database,return the number of * record which is inserted into the database */ private int writeToDB(HashMap<Integer,DBElement> urlDBMap){ boolean insertAble=false,updateAble=false; int num=0; try{ PreparedStatement insertStm=con.prepareStatement("use crawler;insert into visited values(?,?,?);"); DBElement dbe=null; for(Iterator i=urlDBMap.entrySet().iterator();i.hasNext();){ Entry<Integer,DBElement> entry=(Entry<Integer,DBElement>)i.next(); int keyurl=entry.getKey(); DBElement e=entry.getValue(); if(e.flag==2){ insertStm.setInt(1, keyurl); insertStm.setInt(2,e.hits); insertStm.setInt(3, e.keyip); insertStm.addBatch(); num++; insertAble=true; } } if(insertAble) insertStm.executeBatch(); insertStm.close(); PreparedStatement updateStm=con.prepareStatement("use crawler;update visited set hits=? where keyurl=?;"); for(Iterator i=urlDBMap.entrySet().iterator();i.hasNext();){ Entry<Integer,DBElement> entry=(Entry<Integer,DBElement>)i.next(); int keyurl=entry.getKey(); DBElement e=entry.getValue(); if(e.flag==1){ updateStm.setInt(1, e.hits); updateStm.setInt(2, keyurl); updateStm.addBatch(); updateAble=true; } } if(updateAble) updateStm.executeBatch(); updateStm.close(); con.commit(); }catch(Exception e){ e.printStackTrace(); } return num; } /* * read from database,if the number of the record * which belong to the ip exceed the MAXIP,just read * half of it from the database */ public HashMap<Integer,DBElement> readFromDB(int ip,boolean exceed){ HashMap<Integer,DBElement> urlMap=new HashMap<Integer,DBElement>(); String sql=null; int count=MAXIP/2; if(exceed) sql="select top "+count+" keyurl,hits from visited where keyip=?;"; else sql="select keyurl,hits from visited where keyip=?;"; try{ PreparedStatement stm=con.prepareStatement(sql); stm.setInt(1, ip); ResultSet rs=stm.executeQuery(); while(rs.next()) urlMap.put(rs.getInt(1),new DBElement(rs.getInt(2))); rs.close(); stm.close(); }catch(Exception e){ e.printStackTrace(); } return urlMap; } /* * insert into the database directly */ private boolean toDBDirect(int keyurl,DBElement dbe){ boolean contain=false; try{ Statement stm=con.createStatement(); String sql=null; ResultSet rs=stm.executeQuery("use crawler;select hits from visited where keyurl="+keyurl+";"); if(rs.next()){ contain=true; int hits=rs.getInt(1)+dbe.hits; sql="use crawler;update visited set hits="+hits+" where keyurl="+keyurl+";"; }else{ contain=false; sql="use crawler;insert into visited values("+keyurl+","+dbe.hits+","+dbe.keyip+");"; } stm.executeUpdate(sql); rs.close(); stm.close(); }catch(Exception e){ e.printStackTrace(); } return contain; } /* * store the cache data */ public void store(){ while(toDB(0)); } /* * tool classes */ private class Element{ public Element(int i,int h){ip=i;hits=h;} public int ip; public int hits; } private class Int{ public Int(int v){value=v;} public int value=0; } private class DBElement{ public DBElement(int h,int k,int f){hits=h;keyip=k;flag=f;} public DBElement(int h){hits=h;} public int hits=0; public int keyip=0; /* * 0 stand for not change * 1 stand for change * 2 stand for a new record */ public int flag=0;; } private class MyComparator implements Comparator{ public int compare(Object o1,Object o2){ Element e1=(Element)o1; Element e2=(Element)o2; if(e1.hits<e1.hits) return -1; else if(e1.hits>e2.hits) return 1; else return 0; } } }
试试其它关键字
URL过滤器
同语言下
.
List 切割成几份 工具类
.
一行一行读取txt的内容
.
Java PDF转换成图片并输出给前台展示
.
java 多线程框架
.
double类型如果小数点后为零则显示整数否则保留两位小
.
将图片转换为Base64字符串公共类抽取
.
sqlParser 处理SQL(增删改查) 替换schema 用于多租户
.
JAVA 月份中的第几周处理 1-7属于第一周 依次类推 29-
.
java计算两个经纬度之间的距离
.
输入时间参数计算年龄
可能有用的
.
C#实现的html内容截取
.
List 切割成几份 工具类
.
SQL查询 多列合并成一行用逗号隔开
.
一行一行读取txt的内容
.
C#动态修改文件夹名称(FSO实现,不移动文件)
.
c# 移动文件或文件夹
.
c#图片添加水印
.
Java PDF转换成图片并输出给前台展示
.
网站后台修改图片尺寸代码
.
处理大图片在缩略图时的展示
lugechao
贡献的其它代码
(
2
)
.
用于绑定控制列表、表格控件
.
基于LRU算法的URL过滤器
Copyright © 2004 - 2024 dezai.cn. All Rights Reserved
站长博客
粤ICP备13059550号-3